Manually set working directory to source file location (Session Tab)
Add R-squared
Add double single support
Add Interaction
Sensitivity analysis/cluster by participant
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## Warning: package 'lubridate' was built under R version 4.1.3
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v forcats 1.0.0 v readr 2.1.4
## v ggplot2 3.4.4 v stringr 1.5.0
## v lubridate 1.9.2 v tibble 3.2.1
## v purrr 1.0.1 v tidyr 1.3.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(broom)
## Warning: package 'broom' was built under R version 4.1.3
# analysis folder
analysis_version <- '005'
# video metric folder version
metrics_version <- '004'
output_dir <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
analysis_version,
"003_regression_video_metric_vs_outcomes")
scatter_dir <- file.path(output_dir, "scatterplots")
Significance based transparency
univariate_regression_plot <- function(results, plot_title, x_adj) {
# Apply rounding function to p-value columns
results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value))) # Format p-values
# Apply formatting function and create significance indicators
results <- results %>%
mutate(
pval_text = paste0("p=", sapply(p.value, format_p_value)),
sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"), # Create significance group
alpha_level = ifelse(p.value < 0.05, 1, 0.3) # More transparency for non-sig points
)
results
# Determine x position for p-values (offset to the right of the max estimate)
x_max <- max(results$conf.high, na.rm = TRUE) # Get max confidence interval upper bound
results <- results %>% mutate(pval_x_pos = x_max + x_adj) # Place p-values slightly to the right of max x range
p <- ggplot(results, aes(x = estimate, y = term, color = Variable, alpha = alpha_level))+
geom_point(size = 3) + # dot = coefficient estimate
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) + # Whiskers for confidence intervals
geom_text(aes(x = pval_x_pos, label = pval_text, fontface = ifelse(p.value < 0.05, "bold", "plain")),
hjust = 1, size = 3) + # Right-aligned p-values, bold if significant
scale_alpha_identity() + # Use manually set alpha levels
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") + # Reference line at zero
theme_minimal() +
labs(title = plot_title,
x = "Estimate (Effect Size)",
y = "Predictor") +
theme(legend.position = "right") # Hide legend to keep it clean
return(p)
}
# Conditional rounding function
format_p_value <- function(p) {
if (p < 0.001) {
return(formatC(p, format = "e", digits = 1)) # Scientific notation, 1 decimal place
} else {
return(formatC(p, format = "f", digits = 2)) # Standard notation, 2 decimal places
}
}
adj_vs_unadj_regression_plot <- function(results_df, plot_title, x_adj){
set.seed(42) # Set seed for reproducibility
results_df <- results_df %>%
mutate(
pval_text = paste0("p=", sapply(p.value, format_p_value)), # Format p-values
sig = ifelse(is.na(p.value) | p.value >= 0.05, "Non-Significant", "Significant"), # Identify significance
alpha_level = ifelse(is.na(p.value) | p.value >= 0.05, 0.3, 1), # More transparency for non-sig points
jitter_y = as.numeric(factor(term)) + runif(n(), -0.2, 0.2) # Jitter y-axis values slightly
)
# Define colors for models
model_colors <- c("Unadjusted" = "darkblue", "Adjusted" = "darkorange")
# Define shading by creating an index for each predictor group
results_df <- results_df %>%
arrange(term) %>%
mutate(group = as.integer(factor(term)) %% 2) # Alternating shading
# Determine x position for p-values (offset to the right of max estimate)
x_max <- max(results_df$conf.high, na.rm = TRUE) # Get max confidence interval upper bound
results_df <- results_df %>%
mutate(pval_x_pos = x_max + x_adj) # Place p-values slightly to the right of max x range
# Dot-and-whisker plot with enhancements
p2 <- ggplot(results_df, aes(x = estimate, y = term, color = Model, alpha = alpha_level)) +
# Add confidence intervals as horizontal whiskers
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2,
position = position_dodge(width = 0.5)) +
# Add dots for coefficient estimates
geom_point(position = position_dodge(width = 0.5), size = 3) +
# Add background shading for alternate rows
geom_rect(data = results_df, aes(ymin = as.numeric(factor(term)) - 0.5,
ymax = as.numeric(factor(term)) + 0.5,
xmin = -Inf, xmax = Inf,
fill = factor(group)),
inherit.aes = FALSE, alpha = 0.1) +
# Add vertical line at zero (null effect)
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
# Add jittered p-values at right side
geom_text(aes(x = pval_x_pos, y = jitter_y, label = pval_text,
fontface = ifelse(sig == "Significant", "bold", "plain")),
hjust = 1, size = 3) +
scale_color_manual(values = model_colors) + # Set custom colors for models
scale_fill_manual(values = c("white", "gray90")) + # Alternating shading
scale_alpha_identity() + # Use manually set alpha levels
theme_minimal() +
labs(title = plot_title,
x = "Estimate (Effect Size)",
y = "Predictor",
color = "Model") +
theme(legend.position = "right", # Move legend to the right
panel.grid.major = element_blank(), # Remove major grid lines for clarity
panel.grid.minor = element_blank())
return(p2)
}
All videos included in analysis. Videos were included based on if a walking segment could be identified. Some participants may have both a fast walk and preferred walking speed video. Some may have just fast walk or just preferred walking speed.
Each row = 1 video
Preferred walking speed
# Preferred Walking Speed
zeno_pws_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
metrics_version,
"000_merged_cleaned_data",
"zv_bw_merged_gait_vertical_PWS_1_clean.csv")
zeno_pws_df <- read.csv(zeno_pws_path)
table(zeno_pws_df$task_pose)
##
## gait_vertical_PWS_1
## 224
White Not Hispanic as reference because highest %, rest alphabetical
# assign levels to categorical variables
table(zeno_pws_df$race_ethnicity_clean)
##
## Asian Black Or African American Hispanic or Latino
## 17 14 21
## Other/Unknown/Declined White Not Hispanic
## 18 154
zeno_pws_df$race_ethnicity_clean <- factor(zeno_pws_df$race_ethnicity_clean,
levels = c('White Not Hispanic',
'Asian',
'Black Or African American',
'Hispanic or Latino',
'Other/Unknown/Declined'),
ordered = FALSE)
print(levels(zeno_pws_df$race_ethnicity_clean))
## [1] "White Not Hispanic" "Asian"
## [3] "Black Or African American" "Hispanic or Latino"
## [5] "Other/Unknown/Declined"
table(zeno_pws_df$ms_dx_condensed)
##
## MS, Subtype Not Specified Progressive MS RRMS
## 3 40 181
zeno_pws_df$ms_dx_condensed <- factor(zeno_pws_df$ms_dx_condensed,
levels = c('RRMS',
'Progressive MS',
'MS, Subtype Not Specified'),
ordered = FALSE)
print(levels(zeno_pws_df$ms_dx_condensed))
## [1] "RRMS" "Progressive MS"
## [3] "MS, Subtype Not Specified"
table(zeno_pws_df$clean_sex)
##
## Female Male Non-Binary
## 168 54 2
zeno_pws_df$clean_sex <- factor(zeno_pws_df$clean_sex,
levels = c('Female',
'Male',
'Non-Binary'),
ordered = FALSE)
print(levels(zeno_pws_df$clean_sex))
## [1] "Female" "Male" "Non-Binary"
Drop healthy controls - no EDSS and T25FW
zeno_pws_df <- zeno_pws_df[grepl('MS', zeno_pws_df$demographic_diagnosis), ]
table(zeno_pws_df$demographic_diagnosis)
##
## MS
## 224
# convert infinite values to NA
zeno_pws_df[] <- lapply(zeno_pws_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
# filter to unique IDs - one row per person, select first video
nrow(zeno_pws_df)
## [1] 224
zeno_pws_uniqueid_df <- zeno_pws_df %>%
arrange(visit_date_video) %>% # Sort by date
distinct(bw_id, .keep_all = TRUE) # Keep the first occurrence of each ID
nrow(zeno_pws_uniqueid_df)
## [1] 154
Missing Data
# count number of missing variables in preferred walking speed data frame
sum(is.na(zeno_pws_df$delta_pix_h_rel_median_pose_zv))
## [1] 16
sum(is.na(zeno_pws_df$stride_time_mean_sec_pose_zv))
## [1] 48
sum(is.na(zeno_pws_df$mean_cadence_step_per_min_pose_zv))
## [1] 40
sum(is.na(zeno_pws_df$stride_width_mean_cm_pose_zv))
## [1] 40
# stance and all double support measures
sum(is.na(zeno_pws_df$foot1_gait_cycle_time_mean_pose_zv))
## [1] 163
# ground truth preferred walk zeno mat data
sum(is.na(zeno_pws_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(zeno_pws_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(zeno_pws_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics
sum(is.na(zeno_pws_df$demoEHR_Age))
## [1] 0
sum(is.na(zeno_pws_df$demoEHR_DiseaseDuration))
## [1] 0
sum(is.na(zeno_pws_df$ms_dx_condensed))
## [1] 0
sum(is.na(zeno_pws_df$clean_ethnicity))
## [1] 0
sum(is.na(zeno_pws_df$clean_sex))
## [1] 0
sum(is.na(zeno_pws_df$ms_dx_condensed))
## [1] 0
Fast walking speed videos
zeno_fw_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
metrics_version,
"000_merged_cleaned_data",
"zv_bw_merged_gait_vertical_FW_1_clean.csv")
zeno_fw_df <- read.csv(zeno_fw_path)
table(zeno_fw_df$task_pose)
##
## gait_vertical_FW_1
## 222
# assign levels to categorical variables
table(zeno_fw_df$race_ethnicity_clean)
##
## Asian Black Or African American Hispanic or Latino
## 17 14 20
## Other/Unknown/Declined White Not Hispanic
## 18 153
zeno_fw_df$race_ethnicity_clean <- factor(zeno_fw_df$race_ethnicity_clean,
levels = c('White Not Hispanic',
'Asian',
'Black Or African American',
'Hispanic or Latino',
'Other/Unknown/Declined'),
ordered = FALSE)
print(levels(zeno_fw_df$race_ethnicity_clean))
## [1] "White Not Hispanic" "Asian"
## [3] "Black Or African American" "Hispanic or Latino"
## [5] "Other/Unknown/Declined"
table(zeno_fw_df$ms_dx_condensed)
##
## MS, Subtype Not Specified Progressive MS RRMS
## 2 39 181
zeno_fw_df$ms_dx_condensed <- factor(zeno_fw_df$ms_dx_condensed,
levels = c('RRMS',
'Progressive MS',
'MS, Subtype Not Specified'),
ordered = FALSE)
print(levels(zeno_fw_df$ms_dx_condensed))
## [1] "RRMS" "Progressive MS"
## [3] "MS, Subtype Not Specified"
table(zeno_fw_df$clean_sex)
##
## Female Male Non-Binary
## 167 53 2
zeno_fw_df$clean_sex <- factor(zeno_fw_df$clean_sex,
levels = c('Female',
'Male',
'Non-Binary'),
ordered = FALSE)
print(levels(zeno_fw_df$clean_sex))
## [1] "Female" "Male" "Non-Binary"
zeno_fw_df <- zeno_fw_df[grepl('MS', zeno_fw_df$demographic_diagnosis), ]
table(zeno_fw_df$demographic_diagnosis)
##
## MS
## 222
# convert infinite values to NA
zeno_fw_df[] <- lapply(zeno_fw_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
# filter to unique IDs - one row per person, select first video
nrow(zeno_fw_df)
## [1] 222
zeno_fw_uniqueid_df <- zeno_fw_df %>%
arrange(visit_date_video) %>% # Sort by date
distinct(bw_id, .keep_all = TRUE) # Keep the first occurrence of each ID
nrow(zeno_fw_uniqueid_df)
## [1] 154
# count number of missing variables in fast walking speed data frame
sum(is.na(zeno_fw_df$delta_pix_h_rel_median_pose_zv))
## [1] 3
sum(is.na(zeno_fw_df$stride_time_mean_sec_pose_zv))
## [1] 53
sum(is.na(zeno_fw_df$mean_cadence_step_per_min_pose_zv))
## [1] 46
sum(is.na(zeno_fw_df$stride_width_mean_cm_pose_zv))
## [1] 47
# stance and all double support measures
sum(is.na(zeno_fw_df$foot1_gait_cycle_time_mean_pose_zv))
## [1] 188
# ground truth preferred walk zeno mat data
sum(is.na(zeno_fw_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(zeno_fw_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(zeno_fw_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics
sum(is.na(zeno_fw_df$demoEHR_Age))
## [1] 0
sum(is.na(zeno_fw_df$demoEHR_DiseaseDuration))
## [1] 0
sum(is.na(zeno_fw_df$race_ethnicity_clean))
## [1] 0
sum(is.na(zeno_fw_df$clean_sex))
## [1] 0
sum(is.na(zeno_fw_df$ms_dx_condensed))
## [1] 0
Some participants have videos from multiple timepoints (baseline and follow up) At each timepoint, participants sent back two videos - one turning to the right (gait_vertical_right task) and one turning to the left (gait_vertical_left task)
home_path <- file.path("C:/Users/mmccu/Box/MM_Personal/5_Projects/BoveLab/3_Data_and_Code/gait_bw_zeno_home_analysis",
metrics_version,
"000_merged_cleaned_data",
"hv_bw_merged_clean.csv")
home_df <- read.csv(home_path)
nrow(home_df)
## [1] 65
table(home_df$demographic_diagnosis)
##
## MS
## 65
table(home_df$task_pose_hv)
##
## gait_vertical_left gait_vertical_right
## 32 33
# assign levels to categorical variables
table(home_df$race_ethnicity_clean)
##
## Asian Black Or African American Hispanic or Latino
## 4 2 2
## Other/Unknown/Declined White Not Hispanic
## 9 48
home_df$race_ethnicity_clean <- factor(home_df$race_ethnicity_clean,
levels = c('White Not Hispanic',
'Asian',
'Black Or African American',
'Hispanic or Latino',
'Other/Unknown/Declined'),
ordered = FALSE)
print(levels(home_df$race_ethnicity_clean))
## [1] "White Not Hispanic" "Asian"
## [3] "Black Or African American" "Hispanic or Latino"
## [5] "Other/Unknown/Declined"
table(home_df$ms_dx_condensed)
##
## MS, Subtype Not Specified Progressive MS RRMS
## 4 9 52
home_df$ms_dx_condensed <- factor(home_df$ms_dx_condensed,
levels = c('RRMS',
'Progressive MS',
'MS, Subtype Not Specified'),
ordered = FALSE)
print(levels(home_df$ms_dx_condensed))
## [1] "RRMS" "Progressive MS"
## [3] "MS, Subtype Not Specified"
table(home_df$clean_sex)
##
## Female Male Non-Binary
## 52 11 2
home_df$clean_sex <- factor(home_df$clean_sex,
levels = c('Female',
'Male',
'Non-Binary'),
ordered = FALSE)
print(levels(home_df$clean_sex))
## [1] "Female" "Male" "Non-Binary"
# convert infinite values to NA
home_df[] <- lapply(home_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
Group by left and right turning videos, unique IDs
# right turning videos
home_r_df <- home_df[grepl('gait_vertical_right', home_df$task_pose_hv), ]
nrow(home_r_df)
## [1] 33
table(home_r_df$task_pose_hv)
##
## gait_vertical_right
## 33
# left turning videos
home_l_df <- home_df[grepl('gait_vertical_left', home_df$task_pose_hv), ]
nrow(home_l_df)
## [1] 32
table(home_l_df$task_pose_hv)
##
## gait_vertical_left
## 32
# filter to unique IDs - one row per person, select first video
home_uniqueid_df <- home_df %>%
arrange(visit_date_video) %>% # Sort by date
distinct(bw_id, .keep_all = TRUE) # Keep the first occurrence of each ID
nrow(home_uniqueid_df)
## [1] 31
# count number of missing variables in home video data frame
sum(is.na(home_df$delta_pix_h_rel_median_pose_hv))
## [1] 4
sum(is.na(home_df$stride_time_mean_sec_pose_hv))
## [1] 13
sum(is.na(home_df$mean_cadence_step_per_min_pose_hv))
## [1] 12
sum(is.na(home_df$stride_width_mean_cm_pose_hv))
## [1] 12
# stance and all double support measures
sum(is.na(home_df$foot1_gait_cycle_time_mean_pose_hv))
## [1] 34
# ground truth preferred walk zeno mat data
sum(is.na(home_df$PWS_cadencestepsminmean))
## [1] 0
sum(is.na(home_df$bingoEHR_EDSS_measure_value))
## [1] 0
sum(is.na(home_df$msfcEHR_T25FW.SPEED.AVG))
## [1] 0
# demographics
sum(is.na(home_df$demoEHR_Age))
## [1] 0
sum(is.na(home_df$demoEHR_DiseaseDuration))
## [1] 0
sum(is.na(home_df$race_ethnicity_clean))
## [1] 0
sum(is.na(home_df$clean_sex))
## [1] 0
sum(is.na(home_df$ms_dx_condensed))
## [1] 0
PWS, FW, and Home Videos
# preferred walking speed videos
zeno_pws_df$t25fw_log <- log(zeno_pws_df$msfcEHR_T25FW.SPEED.AVG)
ggplot(data = zeno_pws_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = zeno_pws_df, mapping = aes(t25fw_log)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# fast walking speed videos
zeno_fw_df$t25fw_log <- log(zeno_fw_df$msfcEHR_T25FW.SPEED.AVG)
ggplot(data = zeno_fw_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = zeno_fw_df, mapping = aes(t25fw_log)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# home - all videos
home_df$t25fw_log <- log(home_df$msfcEHR_T25FW.SPEED.AVG)
ggplot(data = home_df, mapping = aes(msfcEHR_T25FW.SPEED.AVG)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = home_df, mapping = aes(t25fw_log)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
zeno_pws_df$log_delta_pix_h_rel_median_pose_zv <- log(zeno_pws_df$delta_pix_h_rel_median_pose_zv)
zeno_fw_df$log_delta_pix_h_rel_median_pose_zv <- log(zeno_fw_df$delta_pix_h_rel_median_pose_zv)
home_df$log_delta_pix_h_rel_median_pose_hv <- log(home_df$delta_pix_h_rel_median_pose_hv)
# convert inf to NaN
zeno_pws_df[] <- lapply(zeno_pws_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
zeno_fw_df[] <- lapply(zeno_fw_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
home_df[] <- lapply(home_df, function(x) {
if (is.numeric(x)) replace(x, is.infinite(x), NA) else x
})
# PWS ------------------------------------------
uni1 <- lm(t25fw_log ~ demoEHR_Age, data = zeno_pws_df)
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = zeno_pws_df)
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = zeno_pws_df)
hist(resid(uni4))
uni5 <- lm(t25fw_log ~ clean_sex, data = zeno_pws_df)
hist(resid(uni5))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.09
## 2 demoEHR_DiseaseDuration 0
## 3 ms_dx_condensed 0.22
## 4 race_ethnicity_clean 0.02
## 5 clean_sex 0
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_demographics_univariate_regression_adjR.csv"))
# Plot
title <- "PWS: Log T25FW ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 1)
p
ggsave(file.path(output_dir, "t25fw_pws_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 16 rows containing missing values (`geom_point()`).
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_time_median_sec_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 48 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = mean_cadence_step_per_min_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_width_median_cm_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
# print adjusted R squared
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv 0.34
## 2 stride_time_median_sec_pose_zv 0.14
## 3 mean_cadence_step_per_min_pose_zv 0.13
## 4 stride_width_median_cm_pose_zv 0.08
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'PWS: Log T25FW ~ Metric', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con1_age))
con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con2_age))
con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con3_age))
con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.480 0.0474 -10.1 8.55e-20 -0.574 -0.387 log_del~ Adju~
## 2 strid~ 0.878 0.157 5.59 8.81e- 8 0.568 1.19 stride_~ Adju~
## 3 mean_~ -0.00976 0.00175 -5.57 9.28e- 8 -0.0132 -0.00630 mean_ca~ Adju~
## 4 strid~ 0.0312 0.00810 3.85 1.65e- 4 0.0152 0.0471 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.507 0.0491 -10.3 2.23e-20 -0.604 -0.410 log_del~
## 2 stride_time~ 0.866 0.162 5.34 2.92e- 7 0.545 1.19 stride_~
## 3 mean_cadenc~ -0.00976 0.00183 -5.35 2.65e- 7 -0.0134 -0.00616 mean_ca~
## 4 stride_widt~ 0.0337 0.00832 4.05 7.53e- 5 0.0173 0.0501 stride_~
## 5 log_delta_p~ -0.480 0.0474 -10.1 8.55e-20 -0.574 -0.387 log_del~
## 6 stride_time~ 0.878 0.157 5.59 8.81e- 8 0.568 1.19 stride_~
## 7 mean_cadenc~ -0.00976 0.00175 -5.57 9.28e- 8 -0.0132 -0.00630 mean_ca~
## 8 stride_widt~ 0.0312 0.00810 3.85 1.65e- 4 0.0152 0.0471 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'PWS: Log T25FW ~ Metric + demoEHR_Age', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con1_dur))
con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con2_dur))
con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con3_dur))
con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.504 0.0492 -10.2 3.87e-20 -6.01e-1 -0.407 log_del~
## 2 demoEHR_Dis~ 0.00324 0.00316 1.03 3.06e- 1 -2.99e-3 0.00946 log_del~
## 3 stride_time~ 0.891 0.159 5.59 8.81e- 8 5.76e-1 1.21 stride_~
## 4 demoEHR_Dis~ 0.00830 0.00298 2.79 5.93e- 3 2.42e-3 0.0142 stride_~
## 5 mean_cadenc~ -0.0101 0.00181 -5.61 7.54e- 8 -1.37e-2 -0.00656 mean_ca~
## 6 demoEHR_Dis~ 0.00812 0.00326 2.49 1.35e- 2 1.70e-3 0.0145 mean_ca~
## 7 stride_widt~ 0.0327 0.00829 3.95 1.13e- 4 1.64e-2 0.0491 stride_~
## 8 demoEHR_Dis~ 0.00573 0.00338 1.69 9.20e- 2 -9.45e-4 0.0124 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.504 0.0492 -10.2 3.87e-20 -0.601 -0.407 log_del~ Adju~
## 2 strid~ 0.891 0.159 5.59 8.81e- 8 0.576 1.21 stride_~ Adju~
## 3 mean_~ -0.0101 0.00181 -5.61 7.54e- 8 -0.0137 -0.00656 mean_ca~ Adju~
## 4 strid~ 0.0327 0.00829 3.95 1.13e- 4 0.0164 0.0491 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.507 0.0491 -10.3 2.23e-20 -0.604 -0.410 log_del~
## 2 stride_time~ 0.866 0.162 5.34 2.92e- 7 0.545 1.19 stride_~
## 3 mean_cadenc~ -0.00976 0.00183 -5.35 2.65e- 7 -0.0134 -0.00616 mean_ca~
## 4 stride_widt~ 0.0337 0.00832 4.05 7.53e- 5 0.0173 0.0501 stride_~
## 5 log_delta_p~ -0.504 0.0492 -10.2 3.87e-20 -0.601 -0.407 log_del~
## 6 stride_time~ 0.891 0.159 5.59 8.81e- 8 0.576 1.21 stride_~
## 7 mean_cadenc~ -0.0101 0.00181 -5.61 7.54e- 8 -0.0137 -0.00656 mean_ca~
## 8 stride_widt~ 0.0327 0.00829 3.95 1.13e- 4 0.0164 0.0491 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'PWS: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con1_dx))
con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con2_dx))
con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con3_dx))
con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -0.390 0.0509 -7.66 7.17e-13 -0.490 -0.290 log_del~
## 2 ms_dx_cond~ 0.379 0.0692 5.48 1.24e- 7 0.243 0.516 log_del~
## 3 ms_dx_cond~ 0.109 0.246 0.443 6.58e- 1 -0.376 0.594 log_del~
## 4 stride_tim~ 0.725 0.163 4.45 1.54e- 5 0.404 1.05 stride_~
## 5 ms_dx_cond~ 0.274 0.0696 3.94 1.18e- 4 0.137 0.412 stride_~
## 6 ms_dx_cond~ -0.0776 0.188 -0.413 6.80e- 1 -0.448 0.293 stride_~
## 7 mean_caden~ -0.00752 0.00176 -4.27 3.17e- 5 -0.0110 -0.00404 mean_ca~
## 8 ms_dx_cond~ 0.372 0.0708 5.26 4.03e- 7 0.233 0.512 mean_ca~
## 9 ms_dx_cond~ 0.0467 0.200 0.233 8.16e- 1 -0.349 0.442 mean_ca~
## 10 stride_wid~ 0.0214 0.00813 2.63 9.31e- 3 0.00533 0.0374 stride_~
## 11 ms_dx_cond~ 0.390 0.0739 5.28 3.74e- 7 0.244 0.535 stride_~
## 12 ms_dx_cond~ 0.0554 0.206 0.268 7.89e- 1 -0.352 0.463 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.390 0.0509 -7.66 7.17e-13 -0.490 -0.290 log_del~ Adju~
## 2 strid~ 0.725 0.163 4.45 1.54e- 5 0.404 1.05 stride_~ Adju~
## 3 mean_~ -0.00752 0.00176 -4.27 3.17e- 5 -0.0110 -0.00404 mean_ca~ Adju~
## 4 strid~ 0.0214 0.00813 2.63 9.31e- 3 0.00533 0.0374 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.507 0.0491 -10.3 2.23e-20 -0.604 -0.410 log_del~
## 2 stride_time~ 0.866 0.162 5.34 2.92e- 7 0.545 1.19 stride_~
## 3 mean_cadenc~ -0.00976 0.00183 -5.35 2.65e- 7 -0.0134 -0.00616 mean_ca~
## 4 stride_widt~ 0.0337 0.00832 4.05 7.53e- 5 0.0173 0.0501 stride_~
## 5 log_delta_p~ -0.390 0.0509 -7.66 7.17e-13 -0.490 -0.290 log_del~
## 6 stride_time~ 0.725 0.163 4.45 1.54e- 5 0.404 1.05 stride_~
## 7 mean_cadenc~ -0.00752 0.00176 -4.27 3.17e- 5 -0.0110 -0.00404 mean_ca~
## 8 stride_widt~ 0.0214 0.00813 2.63 9.31e- 3 0.00533 0.0374 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'PWS: Log T25FW ~ Metric + MS DX',1)
p
ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con1_re))
con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con2_re))
con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con3_re))
con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 20 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -0.508 0.0482 -10.5 5.58e-21 -0.603 -0.413 log_del~
## 2 race_ethni~ -0.0273 0.0981 -0.278 7.81e- 1 -0.221 0.166 log_del~
## 3 race_ethni~ 0.344 0.101 3.40 8.00e- 4 0.145 0.543 log_del~
## 4 race_ethni~ 0.0238 0.0881 0.270 7.88e- 1 -0.150 0.198 log_del~
## 5 race_ethni~ -0.0557 0.0926 -0.601 5.48e- 1 -0.238 0.127 log_del~
## 6 stride_tim~ 0.807 0.156 5.19 6.04e- 7 0.500 1.11 stride_~
## 7 race_ethni~ 0.0896 0.0951 0.942 3.48e- 1 -0.0982 0.277 stride_~
## 8 race_ethni~ 0.415 0.0955 4.35 2.38e- 5 0.227 0.603 stride_~
## 9 race_ethni~ -0.0618 0.0793 -0.779 4.37e- 1 -0.218 0.0948 stride_~
## 10 race_ethni~ 0.00641 0.109 0.0589 9.53e- 1 -0.208 0.221 stride_~
## 11 mean_caden~ -0.0101 0.00179 -5.64 6.68e- 8 -0.0136 -0.00655 mean_ca~
## 12 race_ethni~ 0.110 0.107 1.03 3.04e- 1 -0.100 0.320 mean_ca~
## 13 race_ethni~ 0.443 0.106 4.16 4.85e- 5 0.233 0.653 mean_ca~
## 14 race_ethni~ 0.0571 0.0867 0.658 5.11e- 1 -0.114 0.228 mean_ca~
## 15 race_ethni~ -0.0743 0.118 -0.632 5.28e- 1 -0.306 0.158 mean_ca~
## 16 stride_wid~ 0.0298 0.00828 3.60 4.16e- 4 0.0135 0.0461 stride_~
## 17 race_ethni~ 0.0462 0.112 0.412 6.81e- 1 -0.175 0.267 stride_~
## 18 race_ethni~ 0.391 0.113 3.47 6.54e- 4 0.169 0.614 stride_~
## 19 race_ethni~ 0.0592 0.0909 0.651 5.16e- 1 -0.120 0.239 stride_~
## 20 race_ethni~ 0.0643 0.122 0.529 5.97e- 1 -0.175 0.304 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.508 0.0482 -10.5 5.58e-21 -0.603 -0.413 log_del~ Adju~
## 2 strid~ 0.807 0.156 5.19 6.04e- 7 0.500 1.11 stride_~ Adju~
## 3 mean_~ -0.0101 0.00179 -5.64 6.68e- 8 -0.0136 -0.00655 mean_ca~ Adju~
## 4 strid~ 0.0298 0.00828 3.60 4.16e- 4 0.0135 0.0461 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.507 0.0491 -10.3 2.23e-20 -0.604 -0.410 log_del~
## 2 stride_time~ 0.866 0.162 5.34 2.92e- 7 0.545 1.19 stride_~
## 3 mean_cadenc~ -0.00976 0.00183 -5.35 2.65e- 7 -0.0134 -0.00616 mean_ca~
## 4 stride_widt~ 0.0337 0.00832 4.05 7.53e- 5 0.0173 0.0501 stride_~
## 5 log_delta_p~ -0.508 0.0482 -10.5 5.58e-21 -0.603 -0.413 log_del~
## 6 stride_time~ 0.807 0.156 5.19 6.04e- 7 0.500 1.11 stride_~
## 7 mean_cadenc~ -0.0101 0.00179 -5.64 6.68e- 8 -0.0136 -0.00655 mean_ca~
## 8 stride_widt~ 0.0298 0.00828 3.60 4.16e- 4 0.0135 0.0461 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'PWS: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con1_sex))
con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con2_sex))
con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con3_sex))
con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -0.505 0.0495 -10.2 4.89e-20 -0.603 -0.408 log_del~
## 2 clean_sexM~ -0.0402 0.0610 -0.659 5.11e- 1 -0.161 0.0801 log_del~
## 3 clean_sexN~ 0.0610 0.372 0.164 8.70e- 1 -0.672 0.794 log_del~
## 4 stride_tim~ 0.877 0.163 5.39 2.26e- 7 0.556 1.20 stride_~
## 5 clean_sexM~ -0.0765 0.0571 -1.34 1.82e- 1 -0.189 0.0362 stride_~
## 6 clean_sexN~ -0.0812 0.331 -0.245 8.07e- 1 -0.735 0.573 stride_~
## 7 mean_caden~ -0.0101 0.00183 -5.51 1.23e- 7 -0.0137 -0.00646 mean_ca~
## 8 clean_sexM~ -0.118 0.0625 -1.88 6.15e- 2 -0.241 0.00573 mean_ca~
## 9 clean_sexN~ -0.0561 0.367 -0.153 8.79e- 1 -0.780 0.668 mean_ca~
## 10 stride_wid~ 0.0348 0.00832 4.18 4.48e- 5 0.0184 0.0512 stride_~
## 11 clean_sexM~ -0.108 0.0645 -1.67 9.69e- 2 -0.235 0.0196 stride_~
## 12 clean_sexN~ -0.160 0.379 -0.421 6.74e- 1 -0.907 0.588 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.505 0.0495 -10.2 4.89e-20 -0.603 -0.408 log_del~ Adju~
## 2 strid~ 0.877 0.163 5.39 2.26e- 7 0.556 1.20 stride_~ Adju~
## 3 mean_~ -0.0101 0.00183 -5.51 1.23e- 7 -0.0137 -0.00646 mean_ca~ Adju~
## 4 strid~ 0.0348 0.00832 4.18 4.48e- 5 0.0184 0.0512 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.507 0.0491 -10.3 2.23e-20 -0.604 -0.410 log_del~
## 2 stride_time~ 0.866 0.162 5.34 2.92e- 7 0.545 1.19 stride_~
## 3 mean_cadenc~ -0.00976 0.00183 -5.35 2.65e- 7 -0.0134 -0.00616 mean_ca~
## 4 stride_widt~ 0.0337 0.00832 4.05 7.53e- 5 0.0173 0.0501 stride_~
## 5 log_delta_p~ -0.505 0.0495 -10.2 4.89e-20 -0.603 -0.408 log_del~
## 6 stride_time~ 0.877 0.163 5.39 2.26e- 7 0.556 1.20 stride_~
## 7 mean_cadenc~ -0.0101 0.00183 -5.51 1.23e- 7 -0.0137 -0.00646 mean_ca~
## 8 stride_widt~ 0.0348 0.00832 4.18 4.48e- 5 0.0184 0.0512 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'PWS: Log T25FW ~ Metric + Sex', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# PWS
# Unadjusted
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv,
data = zeno_pws_df)
summary(all_unadj)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv, data = zeno_pws_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80025 -0.20246 -0.04435 0.14522 1.24370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.779483 0.507896 1.535 0.126863
## log_delta_pix_h_rel_median_pose_zv -0.192139 0.055311 -3.474 0.000663 ***
## stride_time_median_sec_pose_zv 0.458203 0.232284 1.973 0.050298 .
## mean_cadence_step_per_min_pose_zv -0.002736 0.002682 -1.020 0.309220
## stride_width_median_cm_pose_zv 0.026049 0.007828 3.328 0.001091 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3099 on 157 degrees of freedom
## (62 observations deleted due to missingness)
## Multiple R-squared: 0.2909, Adjusted R-squared: 0.2729
## F-statistic: 16.1 on 4 and 157 DF, p-value: 4.537e-11
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = zeno_pws_df)
summary(all_adjusted)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_pws_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6386 -0.1679 -0.0270 0.1233 1.1528
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.021378 0.494099 2.067
## log_delta_pix_h_rel_median_pose_zv -0.159210 0.052207 -3.050
## stride_time_median_sec_pose_zv 0.272391 0.216581 1.258
## mean_cadence_step_per_min_pose_zv -0.004656 0.002513 -1.853
## stride_width_median_cm_pose_zv 0.012098 0.007641 1.583
## demoEHR_Age 0.006144 0.002333 2.633
## demoEHR_DiseaseDuration 0.002141 0.003254 0.658
## ms_dx_condensedProgressive MS 0.186514 0.075461 2.472
## ms_dx_condensedMS, Subtype Not Specified 0.004998 0.203712 0.025
## race_ethnicity_cleanAsian 0.128675 0.097169 1.324
## race_ethnicity_cleanBlack Or African American 0.442252 0.087473 5.056
## race_ethnicity_cleanHispanic or Latino 0.073267 0.077700 0.943
## race_ethnicity_cleanOther/Unknown/Declined 0.077839 0.103577 0.752
## clean_sexMale -0.074394 0.053747 -1.384
## clean_sexNon-Binary 0.108748 0.282705 0.385
## Pr(>|t|)
## (Intercept) 0.04047 *
## log_delta_pix_h_rel_median_pose_zv 0.00272 **
## stride_time_median_sec_pose_zv 0.21050
## mean_cadence_step_per_min_pose_zv 0.06591 .
## stride_width_median_cm_pose_zv 0.11552
## demoEHR_Age 0.00937 **
## demoEHR_DiseaseDuration 0.51157
## ms_dx_condensedProgressive MS 0.01459 *
## ms_dx_condensedMS, Subtype Not Specified 0.98046
## race_ethnicity_cleanAsian 0.18748
## race_ethnicity_cleanBlack Or African American 1.26e-06 ***
## race_ethnicity_cleanHispanic or Latino 0.34725
## race_ethnicity_cleanOther/Unknown/Declined 0.45355
## clean_sexMale 0.16841
## clean_sexNon-Binary 0.70104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2784 on 147 degrees of freedom
## (62 observations deleted due to missingness)
## Multiple R-squared: 0.4642, Adjusted R-squared: 0.4131
## F-statistic: 9.096 on 14 and 147 DF, p-value: 3.854e-14
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)") %>% # Remove intercept term
mutate(priority = ifelse(grepl("zv", term), 1, 2)) %>% # assign priority to video metrics
arrange(priority) %>% # sort by priority
select(-priority) # drop priority colum
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.27
## 2 Adjusted 0.41
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_pws_allMetrics_unadjusted_vs_adjusted_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'PWS: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p
ggsave(file.path(output_dir, "t25fw_pws_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)
# FW ------------------------------------------
uni1 <- lm(t25fw_log ~ demoEHR_Age, data = zeno_fw_df)
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = zeno_fw_df)
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = zeno_fw_df)
hist(resid(uni4))
uni5 <- lm(t25fw_log ~ clean_sex, data = zeno_fw_df)
hist(resid(uni4))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.09
## 2 demoEHR_DiseaseDuration 0
## 3 ms_dx_condensed 0.22
## 4 race_ethnicity_clean 0.02
## 5 clean_sex 0
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_demographics_univariate_regression_adjR.csv"))
# Plot
title <- "FW: Log T25FW ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 1)
p
ggsave(file.path(output_dir, "t25fw_fw_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_time_median_sec_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 53 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = mean_cadence_step_per_min_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 46 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_width_median_cm_pose_zv, y = t25fw_log)) + geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv 0.44
## 2 stride_time_median_sec_pose_zv 0.34
## 3 mean_cadence_step_per_min_pose_zv 0.28
## 4 stride_width_median_cm_pose_zv 0.02
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'FW: Log T25FW ~ Metric', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con1_age))
con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con2_age))
con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con3_age))
con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.507 0.0413 -12.3 1.46e-26 -0.588 -0.425 log_del~ Adju~
## 2 strid~ 1.44 0.151 9.54 1.85e-17 1.14 1.74 stride_~ Adju~
## 3 mean_~ -0.0108 0.00128 -8.41 1.50e-14 -0.0133 -0.00823 mean_ca~ Adju~
## 4 strid~ 0.0264 0.0117 2.25 2.56e- 2 0.00325 0.0495 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.538 0.0413 -13.0 5.54e-29 -0.619 -0.456 log_del~
## 2 stride_time~ 1.47 0.156 9.40 4.06e-17 1.16 1.77 stride_~
## 3 mean_cadenc~ -0.0111 0.00133 -8.36 1.89e-14 -0.0138 -0.00851 mean_ca~
## 4 stride_widt~ 0.0284 0.0122 2.33 2.07e- 2 0.00439 0.0524 stride_~
## 5 log_delta_p~ -0.507 0.0413 -12.3 1.46e-26 -0.588 -0.425 log_del~
## 6 stride_time~ 1.44 0.151 9.54 1.85e-17 1.14 1.74 stride_~
## 7 mean_cadenc~ -0.0108 0.00128 -8.41 1.50e-14 -0.0133 -0.00823 mean_ca~
## 8 stride_widt~ 0.0264 0.0117 2.25 2.56e- 2 0.00325 0.0495 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'FW: Log T25FW ~ Metric + demoEHR_Age', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con1_dur))
con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con2_dur))
con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con3_dur))
con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.536 0.0415 -12.9 1.33e-28 -6.18e-1 -0.454 log_del~
## 2 demoEHR_Dis~ 0.00119 0.00283 0.420 6.75e- 1 -4.39e-3 0.00677 log_del~
## 3 stride_time~ 1.49 0.153 9.69 7.06e-18 1.18e+0 1.79 stride_~
## 4 demoEHR_Dis~ 0.00841 0.00316 2.66 8.47e- 3 2.18e-3 0.0146 stride_~
## 5 mean_cadenc~ -0.0113 0.00132 -8.50 8.32e-15 -1.39e-2 -0.00865 mean_ca~
## 6 demoEHR_Dis~ 0.00660 0.00364 1.81 7.17e- 2 -5.89e-4 0.0138 mean_ca~
## 7 stride_widt~ 0.0271 0.0123 2.21 2.84e- 2 2.89e-3 0.0513 stride_~
## 8 demoEHR_Dis~ 0.00405 0.00431 0.941 3.48e- 1 -4.45e-3 0.0126 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.536 0.0415 -12.9 1.33e-28 -0.618 -0.454 log_del~ Adju~
## 2 strid~ 1.49 0.153 9.69 7.06e-18 1.18 1.79 stride_~ Adju~
## 3 mean_~ -0.0113 0.00132 -8.50 8.32e-15 -0.0139 -0.00865 mean_ca~ Adju~
## 4 strid~ 0.0271 0.0123 2.21 2.84e- 2 0.00289 0.0513 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.538 0.0413 -13.0 5.54e-29 -0.619 -0.456 log_del~
## 2 stride_time~ 1.47 0.156 9.40 4.06e-17 1.16 1.77 stride_~
## 3 mean_cadenc~ -0.0111 0.00133 -8.36 1.89e-14 -0.0138 -0.00851 mean_ca~
## 4 stride_widt~ 0.0284 0.0122 2.33 2.07e- 2 0.00439 0.0524 stride_~
## 5 log_delta_p~ -0.536 0.0415 -12.9 1.33e-28 -0.618 -0.454 log_del~
## 6 stride_time~ 1.49 0.153 9.69 7.06e-18 1.18 1.79 stride_~
## 7 mean_cadenc~ -0.0113 0.00132 -8.50 8.32e-15 -0.0139 -0.00865 mean_ca~
## 8 stride_widt~ 0.0271 0.0123 2.21 2.84e- 2 0.00289 0.0513 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'FW: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con1_dx))
con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con2_dx))
con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con3_dx))
con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -0.459 0.0421 -10.9 2.56e-22 -0.542 -0.376 log_del~
## 2 ms_dx_cond~ 0.307 0.0599 5.13 6.53e- 7 0.189 0.426 log_del~
## 3 ms_dx_cond~ 0.248 0.316 0.786 4.33e- 1 -0.375 0.872 log_del~
## 4 stride_tim~ 1.27 0.166 7.64 1.64e-12 0.938 1.59 stride_~
## 5 ms_dx_cond~ 0.242 0.0779 3.11 2.18e- 3 0.0887 0.396 stride_~
## 6 ms_dx_cond~ 0.0237 0.236 0.101 9.20e- 1 -0.441 0.489 stride_~
## 7 mean_caden~ -0.00889 0.00133 -6.71 2.69e-10 -0.0115 -0.00628 mean_ca~
## 8 ms_dx_cond~ 0.408 0.0799 5.11 8.55e- 7 0.251 0.566 mean_ca~
## 9 ms_dx_cond~ 0.0731 0.260 0.281 7.79e- 1 -0.441 0.587 mean_ca~
## 10 stride_wid~ 0.0179 0.0111 1.61 1.08e- 1 -0.00398 0.0397 stride_~
## 11 ms_dx_cond~ 0.570 0.0849 6.71 2.80e-10 0.402 0.737 stride_~
## 12 ms_dx_cond~ -0.0381 0.292 -0.131 8.96e- 1 -0.614 0.538 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.459 0.0421 -10.9 2.56e-22 -0.542 -0.376 log_del~ Adju~
## 2 strid~ 1.27 0.166 7.64 1.64e-12 0.938 1.59 stride_~ Adju~
## 3 mean_~ -0.00889 0.00133 -6.71 2.69e-10 -0.0115 -0.00628 mean_ca~ Adju~
## 4 strid~ 0.0179 0.0111 1.61 1.08e- 1 -0.00398 0.0397 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.538 0.0413 -13.0 5.54e-29 -0.619 -0.456 log_del~
## 2 stride_time~ 1.47 0.156 9.40 4.06e-17 1.16 1.77 stride_~
## 3 mean_cadenc~ -0.0111 0.00133 -8.36 1.89e-14 -0.0138 -0.00851 mean_ca~
## 4 stride_widt~ 0.0284 0.0122 2.33 2.07e- 2 0.00439 0.0524 stride_~
## 5 log_delta_p~ -0.459 0.0421 -10.9 2.56e-22 -0.542 -0.376 log_del~
## 6 stride_time~ 1.27 0.166 7.64 1.64e-12 0.938 1.59 stride_~
## 7 mean_cadenc~ -0.00889 0.00133 -6.71 2.69e-10 -0.0115 -0.00628 mean_ca~
## 8 stride_widt~ 0.0179 0.0111 1.61 1.08e- 1 -0.00398 0.0397 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'FW: Log T25FW ~ Metric + MS DX', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con1_re))
con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con2_re))
con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con3_re))
con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 20 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -5.28e-1 0.0416 -12.7 7.12e-28 -0.610 -0.446 log_del~
## 2 race_ethni~ 2.73e-2 0.0871 0.314 7.54e- 1 -0.144 0.199 log_del~
## 3 race_ethni~ 1.88e-1 0.0933 2.02 4.49e- 2 0.00430 0.372 log_del~
## 4 race_ethni~ -8.05e-2 0.0809 -0.995 3.21e- 1 -0.240 0.0789 log_del~
## 5 race_ethni~ -5.02e-2 0.0826 -0.607 5.44e- 1 -0.213 0.113 log_del~
## 6 stride_tim~ 1.39e+0 0.155 8.95 7.51e-16 1.08 1.70 stride_~
## 7 race_ethni~ 2.07e-2 0.0944 0.219 8.27e- 1 -0.166 0.207 stride_~
## 8 race_ethni~ 3.22e-1 0.102 3.16 1.90e- 3 0.120 0.523 stride_~
## 9 race_ethni~ 5.55e-2 0.0886 0.626 5.32e- 1 -0.119 0.231 stride_~
## 10 race_ethni~ -2.30e-2 0.101 -0.228 8.20e- 1 -0.222 0.176 stride_~
## 11 mean_caden~ -1.09e-2 0.00132 -8.23 4.83e-14 -0.0135 -0.00825 mean_ca~
## 12 race_ethni~ 6.65e-4 0.108 0.00613 9.95e- 1 -0.213 0.215 mean_ca~
## 13 race_ethni~ 3.55e-1 0.116 3.06 2.61e- 3 0.126 0.585 mean_ca~
## 14 race_ethni~ 3.70e-2 0.0995 0.372 7.10e- 1 -0.159 0.233 mean_ca~
## 15 race_ethni~ -4.03e-2 0.116 -0.347 7.29e- 1 -0.270 0.189 mean_ca~
## 16 stride_wid~ 2.10e-2 0.0126 1.67 9.64e- 2 -0.00380 0.0459 stride_~
## 17 race_ethni~ 2.66e-2 0.129 0.206 8.37e- 1 -0.228 0.281 stride_~
## 18 race_ethni~ 3.81e-1 0.139 2.74 6.72e- 3 0.107 0.654 stride_~
## 19 race_ethni~ 7.18e-3 0.117 0.0615 9.51e- 1 -0.223 0.237 stride_~
## 20 race_ethni~ -4.67e-2 0.138 -0.339 7.35e- 1 -0.318 0.225 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.528 0.0416 -12.7 7.12e-28 -0.610 -0.446 log_del~ Adju~
## 2 strid~ 1.39 0.155 8.95 7.51e-16 1.08 1.70 stride_~ Adju~
## 3 mean_~ -0.0109 0.00132 -8.23 4.83e-14 -0.0135 -0.00825 mean_ca~ Adju~
## 4 strid~ 0.0210 0.0126 1.67 9.64e- 2 -0.00380 0.0459 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.538 0.0413 -13.0 5.54e-29 -0.619 -0.456 log_del~
## 2 stride_time~ 1.47 0.156 9.40 4.06e-17 1.16 1.77 stride_~
## 3 mean_cadenc~ -0.0111 0.00133 -8.36 1.89e-14 -0.0138 -0.00851 mean_ca~
## 4 stride_widt~ 0.0284 0.0122 2.33 2.07e- 2 0.00439 0.0524 stride_~
## 5 log_delta_p~ -0.528 0.0416 -12.7 7.12e-28 -0.610 -0.446 log_del~
## 6 stride_time~ 1.39 0.155 8.95 7.51e-16 1.08 1.70 stride_~
## 7 mean_cadenc~ -0.0109 0.00132 -8.23 4.83e-14 -0.0135 -0.00825 mean_ca~
## 8 stride_widt~ 0.0210 0.0126 1.67 9.64e- 2 -0.00380 0.0459 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'FW: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con1_sex))
con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con2_sex))
con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con3_sex))
con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ -0.537 0.0416 -12.9 1.44e-28 -0.619 -0.455 log_del~
## 2 clean_sexM~ -0.0266 0.0540 -0.492 6.23e- 1 -0.133 0.0799 log_del~
## 3 clean_sexN~ 0.0274 0.238 0.115 9.08e- 1 -0.442 0.497 log_del~
## 4 stride_tim~ 1.47 0.156 9.44 3.51e-17 1.16 1.78 stride_~
## 5 clean_sexM~ -0.0873 0.0599 -1.46 1.47e- 1 -0.205 0.0309 stride_~
## 6 clean_sexN~ -0.0828 0.241 -0.344 7.32e- 1 -0.559 0.393 stride_~
## 7 mean_caden~ -0.0112 0.00133 -8.41 1.52e-14 -0.0138 -0.00856 mean_ca~
## 8 clean_sexM~ -0.116 0.0684 -1.70 9.12e- 2 -0.251 0.0188 mean_ca~
## 9 clean_sexN~ 0.00268 0.277 0.00966 9.92e- 1 -0.545 0.550 mean_ca~
## 10 stride_wid~ 0.0310 0.0122 2.53 1.21e- 2 0.00687 0.0552 stride_~
## 11 clean_sexM~ -0.129 0.0805 -1.60 1.12e- 1 -0.288 0.0303 stride_~
## 12 clean_sexN~ -0.153 0.324 -0.472 6.38e- 1 -0.792 0.486 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ -0.537 0.0416 -12.9 1.44e-28 -0.619 -0.455 log_del~ Adju~
## 2 strid~ 1.47 0.156 9.44 3.51e-17 1.16 1.78 stride_~ Adju~
## 3 mean_~ -0.0112 0.00133 -8.41 1.52e-14 -0.0138 -0.00856 mean_ca~ Adju~
## 4 strid~ 0.0310 0.0122 2.53 1.21e- 2 0.00687 0.0552 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.538 0.0413 -13.0 5.54e-29 -0.619 -0.456 log_del~
## 2 stride_time~ 1.47 0.156 9.40 4.06e-17 1.16 1.77 stride_~
## 3 mean_cadenc~ -0.0111 0.00133 -8.36 1.89e-14 -0.0138 -0.00851 mean_ca~
## 4 stride_widt~ 0.0284 0.0122 2.33 2.07e- 2 0.00439 0.0524 stride_~
## 5 log_delta_p~ -0.537 0.0416 -12.9 1.44e-28 -0.619 -0.455 log_del~
## 6 stride_time~ 1.47 0.156 9.44 3.51e-17 1.16 1.78 stride_~
## 7 mean_cadenc~ -0.0112 0.00133 -8.41 1.52e-14 -0.0138 -0.00856 mean_ca~
## 8 stride_widt~ 0.0310 0.0122 2.53 1.21e- 2 0.00687 0.0552 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'FW: Log T25FW ~ Metric + Sex', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# FW
# Unadjusted
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv,
data = zeno_fw_df)
summary(all_unadj)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv, data = zeno_fw_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7376 -0.1609 -0.0209 0.1198 1.4365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.286575 0.379772 3.388 0.000885 ***
## log_delta_pix_h_rel_median_pose_zv -0.362830 0.050788 -7.144 2.92e-11 ***
## stride_time_median_sec_pose_zv 0.521656 0.206156 2.530 0.012347 *
## mean_cadence_step_per_min_pose_zv -0.004323 0.001743 -2.480 0.014167 *
## stride_width_median_cm_pose_zv -0.001463 0.008556 -0.171 0.864450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2878 on 162 degrees of freedom
## (55 observations deleted due to missingness)
## Multiple R-squared: 0.5403, Adjusted R-squared: 0.5289
## F-statistic: 47.6 on 4 and 162 DF, p-value: < 2.2e-16
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = zeno_fw_df)
summary(all_adjusted)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_fw_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62995 -0.14801 -0.00986 0.13401 1.22547
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.2167389 0.3792645 3.208
## log_delta_pix_h_rel_median_pose_zv -0.3157477 0.0498813 -6.330
## stride_time_median_sec_pose_zv 0.3987015 0.1985435 2.008
## mean_cadence_step_per_min_pose_zv -0.0044988 0.0016791 -2.679
## stride_width_median_cm_pose_zv -0.0086240 0.0085587 -1.008
## demoEHR_Age 0.0056733 0.0021890 2.592
## demoEHR_DiseaseDuration 0.0009481 0.0031031 0.306
## ms_dx_condensedProgressive MS 0.1349804 0.0697888 1.934
## ms_dx_condensedMS, Subtype Not Specified 0.3094267 0.2790501 1.109
## race_ethnicity_cleanAsian 0.1381439 0.0844554 1.636
## race_ethnicity_cleanBlack Or African American 0.3244756 0.0856144 3.790
## race_ethnicity_cleanHispanic or Latino 0.0724510 0.0764232 0.948
## race_ethnicity_cleanOther/Unknown/Declined 0.0123925 0.0883343 0.140
## clean_sexMale -0.0632811 0.0512554 -1.235
## clean_sexNon-Binary 0.1288287 0.1959085 0.658
## Pr(>|t|)
## (Intercept) 0.001629 **
## log_delta_pix_h_rel_median_pose_zv 2.62e-09 ***
## stride_time_median_sec_pose_zv 0.046401 *
## mean_cadence_step_per_min_pose_zv 0.008191 **
## stride_width_median_cm_pose_zv 0.315234
## demoEHR_Age 0.010481 *
## demoEHR_DiseaseDuration 0.760375
## ms_dx_condensedProgressive MS 0.054955 .
## ms_dx_condensedMS, Subtype Not Specified 0.269243
## race_ethnicity_cleanAsian 0.103971
## race_ethnicity_cleanBlack Or African American 0.000217 ***
## race_ethnicity_cleanHispanic or Latino 0.344623
## race_ethnicity_cleanOther/Unknown/Declined 0.888616
## clean_sexMale 0.218876
## clean_sexNon-Binary 0.511792
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.269 on 152 degrees of freedom
## (55 observations deleted due to missingness)
## Multiple R-squared: 0.623, Adjusted R-squared: 0.5883
## F-statistic: 17.94 on 14 and 152 DF, p-value: < 2.2e-16
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)") %>% # Remove intercept term
mutate(priority = ifelse(grepl("zv", term), 1, 2)) %>% # assign priority to video metrics
arrange(priority) %>% # sort by priority
select(-priority) # drop priority colum
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.53
## 2 Adjusted 0.59
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_fw_allMetrics_unadjusted_vs_adjusted_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'FW: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p
ggsave(file.path(output_dir, "t25fw_fw_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)
uni1 <- lm(t25fw_log ~ demoEHR_Age, data = home_df)
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ demoEHR_DiseaseDuration, data = home_df)
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ ms_dx_condensed, data = home_df)
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ race_ethnicity_clean, data = home_df)
hist(resid(uni4))
uni5 <- lm(t25fw_log ~ clean_sex, data = home_df)
hist(resid(uni4))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.11
## 2 demoEHR_DiseaseDuration 0.03
## 3 ms_dx_condensed 0.29
## 4 race_ethnicity_clean 0.26
## 5 clean_sex 0.01
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_demographics_univariate_regression_adjR.csv"))
# Apply rounding function to p-value columns
results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value))) # Format p-values
# Apply formatting function and create significance indicators
results <- results %>%
mutate(
pval_text = paste0("p=", sapply(p.value, format_p_value)),
sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"), # Create significance group
alpha_level = ifelse(p.value < 0.05, 1, 0.3) # More transparency for non-sig points
)
# Determine x position for p-values (offset to the right of the max estimate)
x_max <- max(results$conf.high, na.rm = TRUE) # Get max confidence interval upper bound
results <- results %>% mutate(pval_x_pos = x_max + 1) # Place p-values slightly to the right of max x range
# Plot
title <- "Home: Log T25FW ~ Demographic/Disease Info"
p <- univariate_regression_plot(results, title, 1)
p
ggsave(file.path(output_dir, "t25fw_home_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = log_delta_pix_h_rel_median_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).
hist(resid(uni1))
uni2 <- lm(t25fw_log ~ stride_time_median_sec_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_time_median_sec_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 13 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = mean_cadence_step_per_min_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(t25fw_log ~ stride_width_median_cm_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_width_median_cm_pose_hv, y = t25fw_log)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_hv 0.15
## 2 stride_time_median_sec_pose_hv 0.44
## 3 mean_cadence_step_per_min_pose_hv 0.19
## 4 stride_width_median_cm_pose_hv 0.33
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'Home: Log T25FW ~ Metric', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con1_age))
con2_age <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con2_age))
con3_age <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con3_age))
con4_age <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ -0.283 0.0783 -3.62 6.29e-4 -0.440 -0.126 log_del~ Adju~
## 2 stride~ 1.01 0.136 7.40 1.59e-9 0.734 1.28 stride_~ Adju~
## 3 mean_c~ -0.00893 0.00234 -3.82 3.71e-4 -0.0136 -0.00424 mean_ca~ Adju~
## 4 stride~ 0.0644 0.0154 4.19 1.13e-4 0.0335 0.0953 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.286 0.0853 -3.35 1.43e-3 -0.456 -0.115 log_del~
## 2 stride_time_~ 1.04 0.161 6.45 4.41e-8 0.716 1.36 stride_~
## 3 mean_cadence~ -0.00940 0.00260 -3.62 6.69e-4 -0.0146 -0.00419 mean_ca~
## 4 stride_width~ 0.0763 0.0148 5.14 4.41e-6 0.0465 0.106 stride_~
## 5 log_delta_pi~ -0.283 0.0783 -3.62 6.29e-4 -0.440 -0.126 log_del~
## 6 stride_time_~ 1.01 0.136 7.40 1.59e-9 0.734 1.28 stride_~
## 7 mean_cadence~ -0.00893 0.00234 -3.82 3.71e-4 -0.0136 -0.00424 mean_ca~
## 8 stride_width~ 0.0644 0.0154 4.19 1.13e-4 0.0335 0.0953 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'Home: Log T25FW ~ Metric + demoEHR_Age', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con1_dur))
con2_dur <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con2_dur))
con3_dur <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con3_dur))
con4_dur <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -2.86e-1 0.0845 -3.38 1.29e-3 -0.455 -0.117 log_del~
## 2 demoEHR_Dise~ 8.36e-3 0.00560 1.49 1.41e-1 -0.00285 0.0196 log_del~
## 3 stride_time_~ 1.13e+0 0.155 7.30 2.32e-9 0.819 1.44 stride_~
## 4 demoEHR_Dise~ 1.29e-2 0.00466 2.77 7.83e-3 0.00356 0.0223 stride_~
## 5 mean_cadence~ -9.91e-3 0.00258 -3.85 3.39e-4 -0.0151 -0.00474 mean_ca~
## 6 demoEHR_Dise~ 9.21e-3 0.00575 1.60 1.16e-1 -0.00235 0.0208 mean_ca~
## 7 stride_width~ 7.61e-2 0.0154 4.94 9.13e-6 0.0452 0.107 stride_~
## 8 demoEHR_Dise~ 2.53e-4 0.00548 0.0463 9.63e-1 -0.0107 0.0113 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ -0.286 0.0845 -3.38 1.29e-3 -0.455 -0.117 log_del~ Adju~
## 2 stride~ 1.13 0.155 7.30 2.32e-9 0.819 1.44 stride_~ Adju~
## 3 mean_c~ -0.00991 0.00258 -3.85 3.39e-4 -0.0151 -0.00474 mean_ca~ Adju~
## 4 stride~ 0.0761 0.0154 4.94 9.13e-6 0.0452 0.107 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.286 0.0853 -3.35 1.43e-3 -0.456 -0.115 log_del~
## 2 stride_time_~ 1.04 0.161 6.45 4.41e-8 0.716 1.36 stride_~
## 3 mean_cadence~ -0.00940 0.00260 -3.62 6.69e-4 -0.0146 -0.00419 mean_ca~
## 4 stride_width~ 0.0763 0.0148 5.14 4.41e-6 0.0465 0.106 stride_~
## 5 log_delta_pi~ -0.286 0.0845 -3.38 1.29e-3 -0.455 -0.117 log_del~
## 6 stride_time_~ 1.13 0.155 7.30 2.32e-9 0.819 1.44 stride_~
## 7 mean_cadence~ -0.00991 0.00258 -3.85 3.39e-4 -0.0151 -0.00474 mean_ca~
## 8 stride_width~ 0.0761 0.0154 4.94 9.13e-6 0.0452 0.107 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'Home: Log T25FW ~ Metric + demoEHR_DiseaseDuration', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con1_dx))
con2_dx <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con2_dx))
con3_dx <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con3_dx))
con4_dx <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.180 0.0747 -2.41 1.92e-2 -0.330 -0.0305 log_del~
## 2 ms_dx_conde~ 0.597 0.112 5.35 1.65e-6 0.373 0.820 log_del~
## 3 ms_dx_conde~ 0.0583 0.156 0.374 7.10e-1 -0.254 0.371 log_del~
## 4 stride_time~ 0.406 0.206 1.97 5.43e-2 -0.00784 0.820 stride_~
## 5 ms_dx_conde~ 0.588 0.140 4.19 1.18e-4 0.306 0.870 stride_~
## 6 ms_dx_conde~ -0.00402 0.130 -0.0309 9.75e-1 -0.266 0.258 stride_~
## 7 mean_cadenc~ -0.00269 0.00217 -1.24 2.20e-1 -0.00704 0.00166 mean_ca~
## 8 ms_dx_conde~ 0.732 0.110 6.64 2.42e-8 0.511 0.954 mean_ca~
## 9 ms_dx_conde~ 0.0180 0.133 0.136 8.92e-1 -0.248 0.284 mean_ca~
## 10 stride_widt~ 0.0392 0.0135 2.91 5.48e-3 0.0121 0.0664 stride_~
## 11 ms_dx_conde~ 0.633 0.107 5.89 3.47e-7 0.417 0.849 stride_~
## 12 ms_dx_conde~ -0.0779 0.129 -0.601 5.50e-1 -0.338 0.182 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ -0.180 0.0747 -2.41 0.0192 -0.330 -0.0305 log_del~ Adju~
## 2 stride~ 0.406 0.206 1.97 0.0543 -0.00784 0.820 stride_~ Adju~
## 3 mean_c~ -0.00269 0.00217 -1.24 0.220 -0.00704 0.00166 mean_ca~ Adju~
## 4 stride~ 0.0392 0.0135 2.91 0.00548 0.0121 0.0664 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.286 0.0853 -3.35 1.43e-3 -0.456 -0.115 log_del~
## 2 stride_time_~ 1.04 0.161 6.45 4.41e-8 0.716 1.36 stride_~
## 3 mean_cadence~ -0.00940 0.00260 -3.62 6.69e-4 -0.0146 -0.00419 mean_ca~
## 4 stride_width~ 0.0763 0.0148 5.14 4.41e-6 0.0465 0.106 stride_~
## 5 log_delta_pi~ -0.180 0.0747 -2.41 1.92e-2 -0.330 -0.0305 log_del~
## 6 stride_time_~ 0.406 0.206 1.97 5.43e-2 -0.00784 0.820 stride_~
## 7 mean_cadence~ -0.00269 0.00217 -1.24 2.20e-1 -0.00704 0.00166 mean_ca~
## 8 stride_width~ 0.0392 0.0135 2.91 5.48e-3 0.0121 0.0664 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'Home: Log T25FW ~ Metric + MS DX', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con1_re))
con2_re <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con2_re))
con3_re <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con3_re))
con4_re <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 17 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ -0.236 0.0835 -2.83 6.49e-3 -0.404 -0.0690 log_del~
## 2 race_ethnic~ -0.334 0.198 -1.68 9.79e-2 -0.732 0.0635 log_del~
## 3 race_ethnic~ 0.850 0.342 2.48 1.62e-2 0.164 1.54 log_del~
## 4 race_ethnic~ 0.206 0.240 0.860 3.93e-1 -0.274 0.687 log_del~
## 5 race_ethnic~ -0.225 0.123 -1.83 7.22e-2 -0.470 0.0210 log_del~
## 6 stride_time~ 0.987 0.152 6.51 4.54e-8 0.682 1.29 stride_~
## 7 race_ethnic~ -0.400 0.191 -2.10 4.13e-2 -0.784 -0.0165 stride_~
## 8 race_ethnic~ 0.220 0.191 1.15 2.56e-1 -0.165 0.604 stride_~
## 9 race_ethnic~ -0.235 0.0980 -2.40 2.07e-2 -0.432 -0.0376 stride_~
## 10 mean_cadenc~ -0.00928 0.00241 -3.85 3.53e-4 -0.0141 -0.00443 mean_ca~
## 11 race_ethnic~ -0.460 0.230 -2.00 5.13e-2 -0.924 0.00279 mean_ca~
## 12 race_ethnic~ 0.118 0.230 0.514 6.09e-1 -0.345 0.581 mean_ca~
## 13 race_ethnic~ -0.316 0.117 -2.69 9.69e-3 -0.552 -0.0802 mean_ca~
## 14 stride_widt~ 0.0684 0.0149 4.59 3.19e-5 0.0385 0.0984 stride_~
## 15 race_ethnic~ -0.393 0.221 -1.78 8.10e-2 -0.837 0.0504 stride_~
## 16 race_ethnic~ 0.0253 0.220 0.115 9.09e-1 -0.418 0.468 stride_~
## 17 race_ethnic~ -0.204 0.114 -1.79 8.03e-2 -0.434 0.0256 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ -0.236 0.0835 -2.83 6.49e-3 -0.404 -0.0690 log_del~ Adju~
## 2 stride~ 0.987 0.152 6.51 4.54e-8 0.682 1.29 stride_~ Adju~
## 3 mean_c~ -0.00928 0.00241 -3.85 3.53e-4 -0.0141 -0.00443 mean_ca~ Adju~
## 4 stride~ 0.0684 0.0149 4.59 3.19e-5 0.0385 0.0984 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.286 0.0853 -3.35 1.43e-3 -0.456 -0.115 log_del~
## 2 stride_time_~ 1.04 0.161 6.45 4.41e-8 0.716 1.36 stride_~
## 3 mean_cadence~ -0.00940 0.00260 -3.62 6.69e-4 -0.0146 -0.00419 mean_ca~
## 4 stride_width~ 0.0763 0.0148 5.14 4.41e-6 0.0465 0.106 stride_~
## 5 log_delta_pi~ -0.236 0.0835 -2.83 6.49e-3 -0.404 -0.0690 log_del~
## 6 stride_time_~ 0.987 0.152 6.51 4.54e-8 0.682 1.29 stride_~
## 7 mean_cadence~ -0.00928 0.00241 -3.85 3.53e-4 -0.0141 -0.00443 mean_ca~
## 8 stride_width~ 0.0684 0.0149 4.59 3.19e-5 0.0385 0.0984 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'Home: Log T25FW ~ Metric + race_ethnicity_clean', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv + clean_sex, data = home_df)
hist(resid(con1_sex))
con2_sex <- lm(t25fw_log ~ stride_time_median_sec_pose_hv + clean_sex, data = home_df)
hist(resid(con2_sex))
con3_sex <- lm(t25fw_log ~ mean_cadence_step_per_min_pose_hv + clean_sex, data = home_df)
hist(resid(con3_sex))
con4_sex <- lm(t25fw_log ~ stride_width_median_cm_pose_hv + clean_sex, data = home_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 9 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.268 0.0894 -3.00 3.96e-3 -0.447 -0.0895 log_del~
## 2 clean_sexMale -0.0901 0.125 -0.722 4.73e-1 -0.340 0.160 log_del~
## 3 clean_sexNon~ -0.0753 0.261 -0.288 7.74e-1 -0.598 0.448 log_del~
## 4 stride_time_~ 1.03 0.162 6.34 6.87e-8 0.703 1.36 stride_~
## 5 clean_sexMale -0.0901 0.105 -0.862 3.93e-1 -0.300 0.120 stride_~
## 6 mean_cadence~ -0.00924 0.00259 -3.57 8.02e-4 -0.0144 -0.00404 mean_ca~
## 7 clean_sexMale -0.143 0.120 -1.20 2.37e-1 -0.384 0.0970 mean_ca~
## 8 stride_width~ 0.0837 0.0143 5.87 3.51e-7 0.0550 0.112 stride_~
## 9 clean_sexMale -0.285 0.105 -2.71 9.16e-3 -0.496 -0.0738 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ -0.268 0.0894 -3.00 3.96e-3 -0.447 -0.0895 log_del~ Adju~
## 2 stride~ 1.03 0.162 6.34 6.87e-8 0.703 1.36 stride_~ Adju~
## 3 mean_c~ -0.00924 0.00259 -3.57 8.02e-4 -0.0144 -0.00404 mean_ca~ Adju~
## 4 stride~ 0.0837 0.0143 5.87 3.51e-7 0.0550 0.112 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ -0.286 0.0853 -3.35 1.43e-3 -0.456 -0.115 log_del~
## 2 stride_time_~ 1.04 0.161 6.45 4.41e-8 0.716 1.36 stride_~
## 3 mean_cadence~ -0.00940 0.00260 -3.62 6.69e-4 -0.0146 -0.00419 mean_ca~
## 4 stride_width~ 0.0763 0.0148 5.14 4.41e-6 0.0465 0.106 stride_~
## 5 log_delta_pi~ -0.268 0.0894 -3.00 3.96e-3 -0.447 -0.0895 log_del~
## 6 stride_time_~ 1.03 0.162 6.34 6.87e-8 0.703 1.36 stride_~
## 7 mean_cadence~ -0.00924 0.00259 -3.57 8.02e-4 -0.0144 -0.00404 mean_ca~
## 8 stride_width~ 0.0837 0.0143 5.87 3.51e-7 0.0550 0.112 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'Home: Log T25FW ~ Metric + Sex', 1)
p
ggsave(file.path(output_dir, "t25fw_home_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# HOme
# Unadjusted
all_unadj <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv +
stride_time_median_sec_pose_hv +
mean_cadence_step_per_min_pose_hv +
stride_width_median_cm_pose_hv,
data = home_df)
summary(all_unadj)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_hv +
## stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv +
## stride_width_median_cm_pose_hv, data = home_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39417 -0.17948 0.05974 0.17898 0.46313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.516539 0.454806 1.136 0.26195
## log_delta_pix_h_rel_median_pose_hv -0.276928 0.118345 -2.340 0.02368 *
## stride_time_median_sec_pose_hv 0.466013 0.220894 2.110 0.04036 *
## mean_cadence_step_per_min_pose_hv -0.003278 0.002541 -1.290 0.20353
## stride_width_median_cm_pose_hv 0.045762 0.013099 3.494 0.00107 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2361 on 46 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.6514, Adjusted R-squared: 0.6211
## F-statistic: 21.49 on 4 and 46 DF, p-value: 4.766e-10
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(t25fw_log ~ log_delta_pix_h_rel_median_pose_hv +
stride_time_median_sec_pose_hv +
mean_cadence_step_per_min_pose_hv +
stride_width_median_cm_pose_hv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = home_df)
summary(all_adjusted)
##
## Call:
## lm(formula = t25fw_log ~ log_delta_pix_h_rel_median_pose_hv +
## stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv +
## stride_width_median_cm_pose_hv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = home_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39260 -0.10178 -0.00946 0.11447 0.34018
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.791302 0.431103 1.836
## log_delta_pix_h_rel_median_pose_hv -0.050404 0.110532 -0.456
## stride_time_median_sec_pose_hv 0.373112 0.212162 1.759
## mean_cadence_step_per_min_pose_hv -0.002326 0.002032 -1.144
## stride_width_median_cm_pose_hv 0.037122 0.012851 2.889
## demoEHR_Age 0.001887 0.002976 0.634
## demoEHR_DiseaseDuration 0.006831 0.004522 1.510
## ms_dx_condensedProgressive MS 0.298683 0.131944 2.264
## ms_dx_condensedMS, Subtype Not Specified -0.168827 0.122111 -1.383
## race_ethnicity_cleanAsian -0.323931 0.144318 -2.245
## race_ethnicity_cleanHispanic or Latino -0.026557 0.154113 -0.172
## race_ethnicity_cleanOther/Unknown/Declined -0.233602 0.087876 -2.658
## clean_sexMale -0.237104 0.093736 -2.529
## Pr(>|t|)
## (Intercept) 0.07426 .
## log_delta_pix_h_rel_median_pose_hv 0.65098
## stride_time_median_sec_pose_hv 0.08669 .
## mean_cadence_step_per_min_pose_hv 0.25964
## stride_width_median_cm_pose_hv 0.00636 **
## demoEHR_Age 0.52982
## demoEHR_DiseaseDuration 0.13922
## ms_dx_condensedProgressive MS 0.02938 *
## ms_dx_condensedMS, Subtype Not Specified 0.17487
## race_ethnicity_cleanAsian 0.03070 *
## race_ethnicity_cleanHispanic or Latino 0.86410
## race_ethnicity_cleanOther/Unknown/Declined 0.01143 *
## clean_sexMale 0.01569 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1829 on 38 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.8271, Adjusted R-squared: 0.7726
## F-statistic: 15.15 on 12 and 38 DF, p-value: 5.692e-11
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)")
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.62
## 2 Adjusted 0.77
write.csv(adj_rsqu_df, file.path(output_dir, "t25fw_home_allMetrics_unadjusted_vs_adjusted_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'Home: Log T25FW Multivariate - Unadjusted vs Adjusted', 1)
p
ggsave(file.path(output_dir, "t25fw_home_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)
ggplot(data = zeno_pws_df, aes(PWS_velocitycmsecmean)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
home_df$log_PWS_velocitycmsecmean <- log(home_df$PWS_velocitycmsecmean)
ggplot(data = home_df, aes(PWS_velocitycmsecmean)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# PWS ------------------------------------------
uni1 <- lm(PWS_velocitycmsecmean ~ demoEHR_Age, data = zeno_pws_df)
hist(resid(uni1))
uni2 <- lm(PWS_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(uni2))
uni3 <- lm(PWS_velocitycmsecmean ~ ms_dx_condensed, data = zeno_pws_df)
hist(resid(uni3))
uni4 <- lm(PWS_velocitycmsecmean ~ race_ethnicity_clean, data = zeno_pws_df)
hist(resid(uni4))
uni5 <- lm(PWS_velocitycmsecmean ~ clean_sex, data = zeno_pws_df)
hist(resid(uni4))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.05
## 2 demoEHR_DiseaseDuration 0.01
## 3 ms_dx_condensed 0.2
## 4 race_ethnicity_clean 0.03
## 5 clean_sex -0.01
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_demographics_univariate_regression_adjR.csv"))
# Plot
title <- "PWS: PWS_velocitycmsecmean ~ Demographic/Disease Info"
p <- univariate_regression_plot(results, title, 5)
p
ggsave(file.path(output_dir, "PWSvel_pws_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 16 rows containing missing values (`geom_point()`).
summary(uni1)
##
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv,
## data = zeno_pws_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -61.352 -20.446 1.464 15.787 83.677
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 145.625 5.307 27.439 < 2e-16 ***
## log_delta_pix_h_rel_median_pose_zv 29.832 3.506 8.509 3.62e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.3 on 206 degrees of freedom
## (16 observations deleted due to missingness)
## Multiple R-squared: 0.2601, Adjusted R-squared: 0.2565
## F-statistic: 72.4 on 1 and 206 DF, p-value: 3.621e-15
hist(resid(uni1))
uni2 <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_time_median_sec_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 48 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = mean_cadence_step_per_min_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv, data = zeno_pws_df)
ggplot(data = zeno_pws_df, aes(x = stride_width_median_cm_pose_zv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 40 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv 0.26
## 2 stride_time_median_sec_pose_zv 0.23
## 3 mean_cadence_step_per_min_pose_zv 0.29
## 4 stride_width_median_cm_pose_zv 0.06
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'PWS: PWS_velocitycmsecmean ~ Metric', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con1_age))
con2_age <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con2_age))
con3_age <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con3_age))
con4_age <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_pws_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 28.5 3.47 8.22 2.23e-14 21.7 35.4 log_del~ Adju~
## 2 strid~ -84.3 11.2 -7.50 3.12e-12 -106. -62.1 stride_~ Adju~
## 3 mean_~ 0.997 0.113 8.79 1.15e-15 0.773 1.22 mean_ca~ Adju~
## 4 strid~ -1.94 0.584 -3.32 1.09e- 3 -3.09 -0.787 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.8 3.51 8.51 3.62e-15 22.9 36.7 log_del~
## 2 stride_time~ -83.7 11.4 -7.36 7.04e-12 -106. -61.2 stride_~
## 3 mean_cadenc~ 0.997 0.116 8.60 3.56e-15 0.769 1.23 mean_ca~
## 4 stride_widt~ -2.06 0.589 -3.50 5.91e- 4 -3.22 -0.897 stride_~
## 5 log_delta_p~ 28.5 3.47 8.22 2.23e-14 21.7 35.4 log_del~
## 6 stride_time~ -84.3 11.2 -7.50 3.12e-12 -106. -62.1 stride_~
## 7 mean_cadenc~ 0.997 0.113 8.79 1.15e-15 0.773 1.22 mean_ca~
## 8 stride_widt~ -1.94 0.584 -3.32 1.09e- 3 -3.09 -0.787 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'PWS: PWS_velocitycmsecmean~ Metric + demoEHR_Age', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con1_dur))
con2_dur <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con2_dur))
con3_dur <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con3_dur))
con4_dur <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_pws_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.6 3.51 8.43 5.94e-15 22.7 36.5 log_del~
## 2 demoEHR_Dis~ -0.297 0.225 -1.32 1.88e- 1 -0.740 0.146 log_del~
## 3 stride_time~ -85.0 11.3 -7.53 2.72e-12 -107. -62.7 stride_~
## 4 demoEHR_Dis~ -0.429 0.211 -2.04 4.32e- 2 -0.846 -0.0133 stride_~
## 5 mean_cadenc~ 1.02 0.115 8.90 5.78e-16 0.794 1.25 mean_ca~
## 6 demoEHR_Dis~ -0.515 0.207 -2.49 1.36e- 2 -0.923 -0.107 mean_ca~
## 7 stride_widt~ -2.01 0.589 -3.41 8.09e- 4 -3.17 -0.844 stride_~
## 8 demoEHR_Dis~ -0.310 0.240 -1.29 1.98e- 1 -0.784 0.163 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 29.6 3.51 8.43 5.94e-15 22.7 36.5 log_del~ Adju~
## 2 strid~ -85.0 11.3 -7.53 2.72e-12 -107. -62.7 stride_~ Adju~
## 3 mean_~ 1.02 0.115 8.90 5.78e-16 0.794 1.25 mean_ca~ Adju~
## 4 strid~ -2.01 0.589 -3.41 8.09e- 4 -3.17 -0.844 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.8 3.51 8.51 3.62e-15 22.9 36.7 log_del~
## 2 stride_time~ -83.7 11.4 -7.36 7.04e-12 -106. -61.2 stride_~
## 3 mean_cadenc~ 0.997 0.116 8.60 3.56e-15 0.769 1.23 mean_ca~
## 4 stride_widt~ -2.06 0.589 -3.50 5.91e- 4 -3.22 -0.897 stride_~
## 5 log_delta_p~ 29.6 3.51 8.43 5.94e-15 22.7 36.5 log_del~
## 6 stride_time~ -85.0 11.3 -7.53 2.72e-12 -107. -62.7 stride_~
## 7 mean_cadenc~ 1.02 0.115 8.90 5.78e-16 0.794 1.25 mean_ca~
## 8 stride_widt~ -2.01 0.589 -3.41 8.09e- 4 -3.17 -0.844 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'PWS: PWS_velocitycmsecmean~ Metric + demoEHR_DiseaseDuration', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con1_dx))
con2_dx <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con2_dx))
con3_dx <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con3_dx))
con4_dx <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_pws_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 21.6 3.65 5.92 1.31e- 8 14.4 28.8 log_del~
## 2 ms_dx_cond~ -26.2 4.96 -5.28 3.34e- 7 -35.9 -16.4 log_del~
## 3 ms_dx_cond~ 8.01 17.6 0.454 6.50e- 1 -26.8 42.8 log_del~
## 4 stride_tim~ -78.5 11.4 -6.88 1.06e-10 -101. -56.0 stride_~
## 5 ms_dx_cond~ -15.9 4.88 -3.26 1.36e- 3 -25.5 -6.25 stride_~
## 6 ms_dx_cond~ 27.6 13.1 2.10 3.75e- 2 1.62 53.5 stride_~
## 7 mean_caden~ 0.876 0.113 7.73 7.37e-13 0.653 1.10 mean_ca~
## 8 ms_dx_cond~ -20.4 4.56 -4.49 1.29e- 5 -29.4 -11.4 mean_ca~
## 9 ms_dx_cond~ 14.8 12.9 1.15 2.52e- 1 -10.6 40.3 mean_ca~
## 10 stride_wid~ -1.25 0.579 -2.16 3.18e- 2 -2.39 -0.110 stride_~
## 11 ms_dx_cond~ -25.7 5.26 -4.89 2.27e- 6 -36.1 -15.3 stride_~
## 12 ms_dx_cond~ 12.9 14.7 0.879 3.80e- 1 -16.1 41.9 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 21.6 3.65 5.92 1.31e- 8 14.4 28.8 log_del~ Adju~
## 2 strid~ -78.5 11.4 -6.88 1.06e-10 -101. -56.0 stride_~ Adju~
## 3 mean_~ 0.876 0.113 7.73 7.37e-13 0.653 1.10 mean_ca~ Adju~
## 4 strid~ -1.25 0.579 -2.16 3.18e- 2 -2.39 -0.110 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.8 3.51 8.51 3.62e-15 22.9 36.7 log_del~
## 2 stride_time~ -83.7 11.4 -7.36 7.04e-12 -106. -61.2 stride_~
## 3 mean_cadenc~ 0.997 0.116 8.60 3.56e-15 0.769 1.23 mean_ca~
## 4 stride_widt~ -2.06 0.589 -3.50 5.91e- 4 -3.22 -0.897 stride_~
## 5 log_delta_p~ 21.6 3.65 5.92 1.31e- 8 14.4 28.8 log_del~
## 6 stride_time~ -78.5 11.4 -6.88 1.06e-10 -101. -56.0 stride_~
## 7 mean_cadenc~ 0.876 0.113 7.73 7.37e-13 0.653 1.10 mean_ca~
## 8 stride_widt~ -1.25 0.579 -2.16 3.18e- 2 -2.39 -0.110 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'PWS: PWS_velocitycmsecmean~ Metric + MS DX',35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con1_re))
con2_re <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con2_re))
con3_re <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con3_re))
con4_re <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_pws_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 20 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 30.2 3.42 8.83 5.00e-16 23.4 36.9 log_del~
## 2 race_ethni~ 13.4 6.96 1.92 5.57e- 2 -0.327 27.1 log_del~
## 3 race_ethni~ -22.4 7.17 -3.12 2.06e- 3 -36.5 -8.25 log_del~
## 4 race_ethni~ 0.140 6.25 0.0224 9.82e- 1 -12.2 12.5 log_del~
## 5 race_ethni~ 4.85 6.57 0.739 4.61e- 1 -8.10 17.8 log_del~
## 6 stride_tim~ -80.5 11.2 -7.19 1.92e-11 -103. -58.4 stride_~
## 7 race_ethni~ 2.22 6.83 0.324 7.46e- 1 -11.3 15.7 stride_~
## 8 race_ethni~ -21.3 6.86 -3.10 2.26e- 3 -34.8 -7.73 stride_~
## 9 race_ethni~ 5.90 5.70 1.04 3.02e- 1 -5.35 17.2 stride_~
## 10 race_ethni~ -0.897 7.82 -0.115 9.09e- 1 -16.3 14.5 stride_~
## 11 mean_caden~ 1.01 0.115 8.83 9.70e-16 0.788 1.24 mean_ca~
## 12 race_ethni~ 0.327 6.85 0.0477 9.62e- 1 -13.2 13.9 mean_ca~
## 13 race_ethni~ -23.9 6.85 -3.50 5.98e- 4 -37.4 -10.4 mean_ca~
## 14 race_ethni~ 0.543 5.57 0.0974 9.23e- 1 -10.5 11.5 mean_ca~
## 15 race_ethni~ 7.32 7.56 0.968 3.35e- 1 -7.61 22.2 mean_ca~
## 16 stride_wid~ -1.89 0.593 -3.19 1.70e- 3 -3.06 -0.719 stride_~
## 17 race_ethni~ 5.37 8.02 0.669 5.04e- 1 -10.5 21.2 stride_~
## 18 race_ethni~ -20.9 8.07 -2.59 1.05e- 2 -36.8 -4.94 stride_~
## 19 race_ethni~ 0.876 6.51 0.135 8.93e- 1 -12.0 13.7 stride_~
## 20 race_ethni~ -5.80 8.70 -0.667 5.06e- 1 -23.0 11.4 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 30.2 3.42 8.83 5.00e-16 23.4 36.9 log_del~ Adju~
## 2 strid~ -80.5 11.2 -7.19 1.92e-11 -103. -58.4 stride_~ Adju~
## 3 mean_~ 1.01 0.115 8.83 9.70e-16 0.788 1.24 mean_ca~ Adju~
## 4 strid~ -1.89 0.593 -3.19 1.70e- 3 -3.06 -0.719 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.8 3.51 8.51 3.62e-15 22.9 36.7 log_del~
## 2 stride_time~ -83.7 11.4 -7.36 7.04e-12 -106. -61.2 stride_~
## 3 mean_cadenc~ 0.997 0.116 8.60 3.56e-15 0.769 1.23 mean_ca~
## 4 stride_widt~ -2.06 0.589 -3.50 5.91e- 4 -3.22 -0.897 stride_~
## 5 log_delta_p~ 30.2 3.42 8.83 5.00e-16 23.4 36.9 log_del~
## 6 stride_time~ -80.5 11.2 -7.19 1.92e-11 -103. -58.4 stride_~
## 7 mean_cadenc~ 1.01 0.115 8.83 9.70e-16 0.788 1.24 mean_ca~
## 8 stride_widt~ -1.89 0.593 -3.19 1.70e- 3 -3.06 -0.719 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'PWS: PWS_velocitycmsecmean~ Metric + race_ethnicity_clean', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con1_sex))
con2_sex <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con2_sex))
con3_sex <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con3_sex))
con4_sex <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_pws_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 29.9 3.53 8.46 5.24e-15 22.9 36.9 log_del~
## 2 clean_sexM~ -1.25 4.36 -0.288 7.74e- 1 -9.85 7.34 log_del~
## 3 clean_sexN~ 0.306 26.5 0.0115 9.91e- 1 -52.0 52.6 log_del~
## 4 stride_tim~ -84.1 11.4 -7.36 7.12e-12 -107. -61.6 stride_~
## 5 clean_sexM~ 3.53 4.02 0.878 3.81e- 1 -4.40 11.5 stride_~
## 6 clean_sexN~ 8.58 23.3 0.369 7.13e- 1 -37.4 54.5 stride_~
## 7 mean_caden~ 1.01 0.117 8.68 2.36e-15 0.781 1.24 mean_ca~
## 8 clean_sexM~ 5.57 3.99 1.40 1.64e- 1 -2.30 13.4 mean_ca~
## 9 clean_sexN~ 4.92 23.4 0.210 8.34e- 1 -41.3 51.1 mean_ca~
## 10 stride_wid~ -2.10 0.592 -3.54 5.02e- 4 -3.27 -0.930 stride_~
## 11 clean_sexM~ 3.70 4.59 0.807 4.21e- 1 -5.35 12.8 stride_~
## 12 clean_sexN~ 14.5 26.9 0.536 5.92e- 1 -38.7 67.6 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 29.9 3.53 8.46 5.24e-15 22.9 36.9 log_del~ Adju~
## 2 strid~ -84.1 11.4 -7.36 7.12e-12 -107. -61.6 stride_~ Adju~
## 3 mean_~ 1.01 0.117 8.68 2.36e-15 0.781 1.24 mean_ca~ Adju~
## 4 strid~ -2.10 0.592 -3.54 5.02e- 4 -3.27 -0.930 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 29.8 3.51 8.51 3.62e-15 22.9 36.7 log_del~
## 2 stride_time~ -83.7 11.4 -7.36 7.04e-12 -106. -61.2 stride_~
## 3 mean_cadenc~ 0.997 0.116 8.60 3.56e-15 0.769 1.23 mean_ca~
## 4 stride_widt~ -2.06 0.589 -3.50 5.91e- 4 -3.22 -0.897 stride_~
## 5 log_delta_p~ 29.9 3.53 8.46 5.24e-15 22.9 36.9 log_del~
## 6 stride_time~ -84.1 11.4 -7.36 7.12e-12 -107. -61.6 stride_~
## 7 mean_cadenc~ 1.01 0.117 8.68 2.36e-15 0.781 1.24 mean_ca~
## 8 stride_widt~ -2.10 0.592 -3.54 5.02e- 4 -3.27 -0.930 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'PWS: PWS_velocitycmsecmean~ Metric + Sex', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# PWS
# Unadjusted
all_unadj <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv,
data = zeno_pws_df)
summary(all_unadj)
##
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv, data = zeno_pws_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.874 -13.154 0.782 16.072 39.382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 111.5979 35.5240 3.141 0.002009 **
## log_delta_pix_h_rel_median_pose_zv 5.4194 3.8686 1.401 0.163234
## stride_time_median_sec_pose_zv -44.1353 16.2467 -2.717 0.007336 **
## mean_cadence_step_per_min_pose_zv 0.6453 0.1876 3.440 0.000746 ***
## stride_width_median_cm_pose_zv -1.4279 0.5475 -2.608 0.009989 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.68 on 157 degrees of freedom
## (62 observations deleted due to missingness)
## Multiple R-squared: 0.3772, Adjusted R-squared: 0.3613
## F-statistic: 23.77 on 4 and 157 DF, p-value: 2.207e-15
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = zeno_pws_df)
summary(all_adjusted)
##
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_pws_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -92.112 -13.131 2.296 12.986 40.541
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 96.5168 36.4630 2.647
## log_delta_pix_h_rel_median_pose_zv 3.4012 3.8527 0.883
## stride_time_median_sec_pose_zv -35.3353 15.9830 -2.211
## mean_cadence_step_per_min_pose_zv 0.7379 0.1854 3.979
## stride_width_median_cm_pose_zv -0.6654 0.5639 -1.180
## demoEHR_Age -0.2455 0.1722 -1.426
## demoEHR_DiseaseDuration -0.2610 0.2401 -1.087
## ms_dx_condensedProgressive MS -11.0117 5.5688 -1.977
## ms_dx_condensedMS, Subtype Not Specified 13.1157 15.0333 0.872
## race_ethnicity_cleanAsian -0.5339 7.1708 -0.074
## race_ethnicity_cleanBlack Or African American -21.6522 6.4552 -3.354
## race_ethnicity_cleanHispanic or Latino 1.3116 5.7340 0.229
## race_ethnicity_cleanOther/Unknown/Declined -0.8366 7.6437 -0.109
## clean_sexMale 4.1575 3.9663 1.048
## clean_sexNon-Binary -1.9911 20.8628 -0.095
## Pr(>|t|)
## (Intercept) 0.009007 **
## log_delta_pix_h_rel_median_pose_zv 0.378786
## stride_time_median_sec_pose_zv 0.028594 *
## mean_cadence_step_per_min_pose_zv 0.000108 ***
## stride_width_median_cm_pose_zv 0.239905
## demoEHR_Age 0.156094
## demoEHR_DiseaseDuration 0.278775
## ms_dx_condensedProgressive MS 0.049869 *
## ms_dx_condensedMS, Subtype Not Specified 0.384391
## race_ethnicity_cleanAsian 0.940754
## race_ethnicity_cleanBlack Or African American 0.001013 **
## race_ethnicity_cleanHispanic or Latino 0.819384
## race_ethnicity_cleanOther/Unknown/Declined 0.912991
## clean_sexMale 0.296267
## clean_sexNon-Binary 0.924099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.55 on 147 degrees of freedom
## (62 observations deleted due to missingness)
## Multiple R-squared: 0.4761, Adjusted R-squared: 0.4262
## F-statistic: 9.54 on 14 and 147 DF, p-value: 8.586e-15
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.36
## 2 Adjusted 0.43
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_pws_allMetrics_unadjusted_vs_adjusted_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'PWS: PWS_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p
ggsave(file.path(output_dir, "PWSvel_pws_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)
uni1 <- lm(PWS_velocitycmsecmean ~ demoEHR_Age, data = home_df)
hist(resid(uni1))
uni2 <- lm(PWS_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = home_df)
hist(resid(uni2))
uni3 <- lm(PWS_velocitycmsecmean ~ ms_dx_condensed, data = home_df)
hist(resid(uni3))
uni4 <- lm(PWS_velocitycmsecmean ~ race_ethnicity_clean, data = home_df)
hist(resid(uni4))
uni5 <- lm(PWS_velocitycmsecmean ~ clean_sex, data = home_df)
hist(resid(uni4))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.03
## 2 demoEHR_DiseaseDuration 0.02
## 3 ms_dx_condensed 0.27
## 4 race_ethnicity_clean 0.16
## 5 clean_sex 0.06
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_demographics_univariate_regression_adjR.csv"))
# Apply rounding function to p-value columns
results <- results %>% mutate(pval_text = paste0("p=", sapply(p.value, format_p_value))) # Format p-values
# Apply formatting function and create significance indicators
results <- results %>%
mutate(
pval_text = paste0("p=", sapply(p.value, format_p_value)),
sig = ifelse(p.value < 0.05, "Significant", "Non-Significant"), # Create significance group
alpha_level = ifelse(p.value < 0.05, 1, 0.3) # More transparency for non-sig points
)
# Determine x position for p-values (offset to the right of the max estimate)
x_max <- max(results$conf.high, na.rm = TRUE) # Get max confidence interval upper bound
results <- results %>% mutate(pval_x_pos = x_max + 1) # Place p-values slightly to the right of max x range
# Plot
title <- "Home: PWS_velocitycmsecmean ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 35)
p
ggsave(file.path(output_dir, "PWSvel_home_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = log_delta_pix_h_rel_median_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).
hist(resid(uni1))
uni2 <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_time_median_sec_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 13 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = mean_cadence_step_per_min_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv, data = home_df)
ggplot(data = home_df, aes(x = stride_width_median_cm_pose_hv, y = PWS_velocitycmsecmean)) + geom_point()
## Warning: Removed 12 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_hv 0.23
## 2 stride_time_median_sec_pose_hv 0.3
## 3 mean_cadence_step_per_min_pose_hv 0.04
## 4 stride_width_median_cm_pose_hv 0.18
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'PWS_velocitycmsecmean ~ Metric', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con1_age))
con2_age <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con2_age))
con3_age <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con3_age))
con4_age <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + demoEHR_Age, data = home_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ 25.2 5.73 4.40 4.71e-5 13.7 36.7 log_del~ Adju~
## 2 stride~ -63.7 13.2 -4.81 1.46e-5 -90.3 -37.1 stride_~ Adju~
## 3 mean_c~ 0.357 0.209 1.70 9.46e-2 -0.0638 0.777 mean_ca~ Adju~
## 4 stride~ -3.88 1.34 -2.90 5.53e-3 -6.57 -1.19 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.89 4.30 6.51e-5 13.5 37.1 log_del~
## 2 stride_time_~ -65.2 13.8 -4.74 1.84e-5 -92.9 -37.6 stride_~
## 3 mean_cadence~ 0.380 0.215 1.77 8.32e-2 -0.0517 0.812 mean_ca~
## 4 stride_width~ -4.37 1.25 -3.50 9.74e-4 -6.88 -1.87 stride_~
## 5 log_delta_pi~ 25.2 5.73 4.40 4.71e-5 13.7 36.7 log_del~
## 6 stride_time_~ -63.7 13.2 -4.81 1.46e-5 -90.3 -37.1 stride_~
## 7 mean_cadence~ 0.357 0.209 1.70 9.46e-2 -0.0638 0.777 mean_ca~
## 8 stride_width~ -3.88 1.34 -2.90 5.53e-3 -6.57 -1.19 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'Home: PWS_velocitycmsecmean ~ Metric + demoEHR_Age', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con1_dur))
con2_dur <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con2_dur))
con3_dur <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con3_dur))
con4_dur <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + demoEHR_DiseaseDuration, data = home_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.86 4.32 6.18e-5 13.6 37.1 log_del~
## 2 demoEHR_Dise~ -0.483 0.389 -1.24 2.19e-1 -1.26 0.295 log_del~
## 3 stride_time_~ -69.7 13.9 -5.01 7.56e-6 -97.7 -41.7 stride_~
## 4 demoEHR_Dise~ -0.632 0.419 -1.51 1.37e-1 -1.47 0.209 stride_~
## 5 mean_cadence~ 0.400 0.218 1.84 7.18e-2 -0.0368 0.837 mean_ca~
## 6 demoEHR_Dise~ -0.368 0.486 -0.756 4.53e-1 -1.34 0.609 mean_ca~
## 7 stride_width~ -4.44 1.30 -3.43 1.23e-3 -7.05 -1.84 stride_~
## 8 demoEHR_Dise~ 0.106 0.461 0.230 8.19e-1 -0.820 1.03 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ 25.3 5.86 4.32 6.18e-5 13.6 37.1 log_del~ Adju~
## 2 stride~ -69.7 13.9 -5.01 7.56e-6 -97.7 -41.7 stride_~ Adju~
## 3 mean_c~ 0.400 0.218 1.84 7.18e-2 -0.0368 0.837 mean_ca~ Adju~
## 4 stride~ -4.44 1.30 -3.43 1.23e-3 -7.05 -1.84 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.89 4.30 6.51e-5 13.5 37.1 log_del~
## 2 stride_time_~ -65.2 13.8 -4.74 1.84e-5 -92.9 -37.6 stride_~
## 3 mean_cadence~ 0.380 0.215 1.77 8.32e-2 -0.0517 0.812 mean_ca~
## 4 stride_width~ -4.37 1.25 -3.50 9.74e-4 -6.88 -1.87 stride_~
## 5 log_delta_pi~ 25.3 5.86 4.32 6.18e-5 13.6 37.1 log_del~
## 6 stride_time_~ -69.7 13.9 -5.01 7.56e-6 -97.7 -41.7 stride_~
## 7 mean_cadence~ 0.400 0.218 1.84 7.18e-2 -0.0368 0.837 mean_ca~
## 8 stride_width~ -4.44 1.30 -3.43 1.23e-3 -7.05 -1.84 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'Home: PWS_velocitycmsecmean ~ Metric + demoEHR_DiseaseDuration', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con1_dx))
con2_dx <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con2_dx))
con3_dx <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con3_dx))
con4_dx <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + ms_dx_condensed, data = home_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 18.0 5.34 3.37 1.36e-3 7.28 28.7 log_del~
## 2 ms_dx_conde~ -37.6 7.98 -4.72 1.58e-5 -53.6 -21.7 log_del~
## 3 ms_dx_conde~ 4.84 11.2 0.434 6.66e-1 -17.5 27.2 log_del~
## 4 stride_time~ -17.3 17.9 -0.968 3.38e-1 -53.3 18.7 stride_~
## 5 ms_dx_conde~ -44.3 12.2 -3.64 6.70e-4 -68.8 -19.8 stride_~
## 6 ms_dx_conde~ 11.3 11.3 1.00 3.21e-1 -11.4 34.1 stride_~
## 7 mean_cadenc~ -0.139 0.185 -0.752 4.55e-1 -0.512 0.233 mean_ca~
## 8 ms_dx_conde~ -56.9 9.43 -6.03 2.10e-7 -75.8 -37.9 mean_ca~
## 9 ms_dx_conde~ 8.83 11.3 0.779 4.39e-1 -13.9 31.6 mean_ca~
## 10 stride_widt~ -1.85 1.21 -1.53 1.32e-1 -4.28 0.576 stride_~
## 11 ms_dx_conde~ -45.8 9.62 -4.76 1.74e-5 -65.1 -26.5 stride_~
## 12 ms_dx_conde~ 14.6 11.6 1.26 2.12e-1 -8.65 37.9 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ 18.0 5.34 3.37 0.00136 7.28 28.7 log_del~ Adju~
## 2 stride~ -17.3 17.9 -0.968 0.338 -53.3 18.7 stride_~ Adju~
## 3 mean_c~ -0.139 0.185 -0.752 0.455 -0.512 0.233 mean_ca~ Adju~
## 4 stride~ -1.85 1.21 -1.53 0.132 -4.28 0.576 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.89 4.30 6.51e-5 13.5 37.1 log_del~
## 2 stride_time_~ -65.2 13.8 -4.74 1.84e-5 -92.9 -37.6 stride_~
## 3 mean_cadence~ 0.380 0.215 1.77 8.32e-2 -0.0517 0.812 mean_ca~
## 4 stride_width~ -4.37 1.25 -3.50 9.74e-4 -6.88 -1.87 stride_~
## 5 log_delta_pi~ 18.0 5.34 3.37 1.36e-3 7.28 28.7 log_del~
## 6 stride_time_~ -17.3 17.9 -0.968 3.38e-1 -53.3 18.7 stride_~
## 7 mean_cadence~ -0.139 0.185 -0.752 4.55e-1 -0.512 0.233 mean_ca~
## 8 stride_width~ -1.85 1.21 -1.53 1.32e-1 -4.28 0.576 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'Home: PWS_velocitycmsecmean ~ Metric + MS DX', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con1_re))
con2_re <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con2_re))
con3_re <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con3_re))
con4_re <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + race_ethnicity_clean, data = home_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 17 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 25.0 5.84 4.28 7.47e-5 13.3 36.7 log_del~
## 2 race_ethnic~ 33.5 13.9 2.41 1.93e-2 5.65 61.3 log_del~
## 3 race_ethnic~ -34.2 24.0 -1.43 1.59e-1 -82.3 13.8 log_del~
## 4 race_ethnic~ -26.0 16.8 -1.55 1.27e-1 -59.7 7.61 log_del~
## 5 race_ethnic~ 8.23 8.57 0.960 3.41e-1 -8.95 25.4 log_del~
## 6 stride_time~ -62.8 13.4 -4.68 2.46e-5 -89.8 -35.8 stride_~
## 7 race_ethnic~ 30.8 16.9 1.82 7.45e-2 -3.17 64.8 stride_~
## 8 race_ethnic~ -23.6 16.9 -1.40 1.70e-1 -57.6 10.4 stride_~
## 9 race_ethnic~ 12.0 8.68 1.38 1.75e-1 -5.49 29.4 stride_~
## 10 mean_cadenc~ 0.371 0.208 1.78 8.09e-2 -0.0473 0.789 mean_ca~
## 11 race_ethnic~ 35.2 19.9 1.77 8.29e-2 -4.76 75.1 mean_ca~
## 12 race_ethnic~ -17.1 19.8 -0.864 3.92e-1 -57.0 22.7 mean_ca~
## 13 race_ethnic~ 16.9 10.1 1.68 1.00e-1 -3.38 37.2 mean_ca~
## 14 stride_widt~ -3.84 1.28 -3.01 4.17e-3 -6.40 -1.27 stride_~
## 15 race_ethnic~ 30.9 18.9 1.64 1.08e-1 -7.06 68.9 stride_~
## 16 race_ethnic~ -12.0 18.9 -0.639 5.26e-1 -50.0 25.9 stride_~
## 17 race_ethnic~ 10.7 9.79 1.09 2.81e-1 -9.00 30.4 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ 25.0 5.84 4.28 7.47e-5 13.3 36.7 log_del~ Adju~
## 2 stride~ -62.8 13.4 -4.68 2.46e-5 -89.8 -35.8 stride_~ Adju~
## 3 mean_c~ 0.371 0.208 1.78 8.09e-2 -0.0473 0.789 mean_ca~ Adju~
## 4 stride~ -3.84 1.28 -3.01 4.17e-3 -6.40 -1.27 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.89 4.30 6.51e-5 13.5 37.1 log_del~
## 2 stride_time_~ -65.2 13.8 -4.74 1.84e-5 -92.9 -37.6 stride_~
## 3 mean_cadence~ 0.380 0.215 1.77 8.32e-2 -0.0517 0.812 mean_ca~
## 4 stride_width~ -4.37 1.25 -3.50 9.74e-4 -6.88 -1.87 stride_~
## 5 log_delta_pi~ 25.0 5.84 4.28 7.47e-5 13.3 36.7 log_del~
## 6 stride_time_~ -62.8 13.4 -4.68 2.46e-5 -89.8 -35.8 stride_~
## 7 mean_cadence~ 0.371 0.208 1.78 8.09e-2 -0.0473 0.789 mean_ca~
## 8 stride_width~ -3.84 1.28 -3.01 4.17e-3 -6.40 -1.27 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'Home: PWS_velocitycmsecmean ~ Metric + race_ethnicity_clean', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv + clean_sex, data = home_df)
hist(resid(con1_sex))
con2_sex <- lm(PWS_velocitycmsecmean ~ stride_time_median_sec_pose_hv + clean_sex, data = home_df)
hist(resid(con2_sex))
con3_sex <- lm(PWS_velocitycmsecmean ~ mean_cadence_step_per_min_pose_hv + clean_sex, data = home_df)
hist(resid(con3_sex))
con4_sex <- lm(PWS_velocitycmsecmean ~ stride_width_median_cm_pose_hv + clean_sex, data = home_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_hv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_hv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_hv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_hv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 9 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 23.1 6.07 3.80 3.53e-4 10.9 35.2 log_del~
## 2 clean_sexMale 11.5 8.47 1.36 1.80e-1 -5.46 28.5 log_del~
## 3 clean_sexNon~ 17.4 17.7 0.981 3.31e-1 -18.1 52.9 log_del~
## 4 stride_time_~ -63.6 13.6 -4.67 2.38e-5 -91.0 -36.2 stride_~
## 5 clean_sexMale 13.7 8.78 1.56 1.25e-1 -3.93 31.4 stride_~
## 6 mean_cadence~ 0.359 0.211 1.71 9.43e-2 -0.0639 0.782 mean_ca~
## 7 clean_sexMale 17.7 9.75 1.82 7.50e-2 -1.85 37.3 mean_ca~
## 8 stride_width~ -5.05 1.19 -4.25 9.34e-5 -7.43 -2.66 stride_~
## 9 clean_sexMale 25.7 8.74 2.94 4.89e-3 8.18 43.3 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_de~ 23.1 6.07 3.80 3.53e-4 10.9 35.2 log_del~ Adju~
## 2 stride~ -63.6 13.6 -4.67 2.38e-5 -91.0 -36.2 stride_~ Adju~
## 3 mean_c~ 0.359 0.211 1.71 9.43e-2 -0.0639 0.782 mean_ca~ Adju~
## 4 stride~ -5.05 1.19 -4.25 9.34e-5 -7.43 -2.66 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_pi~ 25.3 5.89 4.30 6.51e-5 13.5 37.1 log_del~
## 2 stride_time_~ -65.2 13.8 -4.74 1.84e-5 -92.9 -37.6 stride_~
## 3 mean_cadence~ 0.380 0.215 1.77 8.32e-2 -0.0517 0.812 mean_ca~
## 4 stride_width~ -4.37 1.25 -3.50 9.74e-4 -6.88 -1.87 stride_~
## 5 log_delta_pi~ 23.1 6.07 3.80 3.53e-4 10.9 35.2 log_del~
## 6 stride_time_~ -63.6 13.6 -4.67 2.38e-5 -91.0 -36.2 stride_~
## 7 mean_cadence~ 0.359 0.211 1.71 9.43e-2 -0.0639 0.782 mean_ca~
## 8 stride_width~ -5.05 1.19 -4.25 9.34e-5 -7.43 -2.66 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'Home: PWS_velocitycmsecmean ~ Metric + Sex', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# HOme
# Unadjusted
all_unadj <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv +
stride_time_median_sec_pose_hv +
mean_cadence_step_per_min_pose_hv +
stride_width_median_cm_pose_hv,
data = home_df)
summary(all_unadj)
##
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv +
## stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv +
## stride_width_median_cm_pose_hv, data = home_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.428 -15.413 -0.333 15.053 39.112
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 217.26491 40.39966 5.378 2.45e-06 ***
## log_delta_pix_h_rel_median_pose_hv 29.67787 10.51239 2.823 0.0070 **
## stride_time_median_sec_pose_hv -34.36527 19.62163 -1.751 0.0865 .
## mean_cadence_step_per_min_pose_hv -0.07187 0.22571 -0.318 0.7516
## stride_width_median_cm_pose_hv -2.18518 1.16357 -1.878 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.97 on 46 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.509, Adjusted R-squared: 0.4663
## F-statistic: 11.92 on 4 and 46 DF, p-value: 9.964e-07
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv +
stride_time_median_sec_pose_hv +
mean_cadence_step_per_min_pose_hv +
stride_width_median_cm_pose_hv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = home_df)
summary(all_adjusted)
##
## Call:
## lm(formula = PWS_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_hv +
## stride_time_median_sec_pose_hv + mean_cadence_step_per_min_pose_hv +
## stride_width_median_cm_pose_hv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = home_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.949 -12.878 1.424 12.322 24.056
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 159.322107 40.797510 3.905
## log_delta_pix_h_rel_median_pose_hv 11.864874 10.460199 1.134
## stride_time_median_sec_pose_hv -5.506569 20.078003 -0.274
## mean_cadence_step_per_min_pose_hv -0.184827 0.192318 -0.961
## stride_width_median_cm_pose_hv -2.182158 1.216133 -1.794
## demoEHR_Age 0.278120 0.281678 0.987
## demoEHR_DiseaseDuration -0.005673 0.427987 -0.013
## ms_dx_condensedProgressive MS -42.520371 12.486508 -3.405
## ms_dx_condensedMS, Subtype Not Specified 0.677488 11.555976 0.059
## race_ethnicity_cleanAsian 35.238467 13.657605 2.580
## race_ethnicity_cleanHispanic or Latino -23.922461 14.584544 -1.640
## race_ethnicity_cleanOther/Unknown/Declined 11.062739 8.316162 1.330
## clean_sexMale 25.611581 8.870735 2.887
## Pr(>|t|)
## (Intercept) 0.000374 ***
## log_delta_pix_h_rel_median_pose_hv 0.263779
## stride_time_median_sec_pose_hv 0.785371
## mean_cadence_step_per_min_pose_hv 0.342603
## stride_width_median_cm_pose_hv 0.080718 .
## demoEHR_Age 0.329707
## demoEHR_DiseaseDuration 0.989493
## ms_dx_condensedProgressive MS 0.001573 **
## ms_dx_condensedMS, Subtype Not Specified 0.953557
## race_ethnicity_cleanAsian 0.013866 *
## race_ethnicity_cleanHispanic or Latino 0.109204
## race_ethnicity_cleanOther/Unknown/Declined 0.191358
## clean_sexMale 0.006380 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.31 on 38 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.7237, Adjusted R-squared: 0.6365
## F-statistic: 8.295 on 12 and 38 DF, p-value: 2.263e-07
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
unadj_all_results
## # A tibble: 5 x 9
## term estimate std.error statistic p.value conf.low conf.high Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 (Intercept) 217. 40.4 5.38 2.45e-6 136. 299. Unad~
## 2 log_delta_pix_h~ 29.7 10.5 2.82 7.00e-3 8.52 50.8 Unad~
## 3 stride_time_med~ -34.4 19.6 -1.75 8.65e-2 -73.9 5.13 Unad~
## 4 mean_cadence_st~ -0.0719 0.226 -0.318 7.52e-1 -0.526 0.382 Unad~
## 5 stride_width_me~ -2.19 1.16 -1.88 6.67e-2 -4.53 0.157 Unad~
## # i 1 more variable: AdjRsquared <dbl>
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
adj_all_results
## # A tibble: 13 x 9
## term estimate std.error statistic p.value conf.low conf.high Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 (Intercept) 1.59e+2 40.8 3.91 3.74e-4 76.7 242. Adju~
## 2 log_delta_pix_~ 1.19e+1 10.5 1.13 2.64e-1 -9.31 33.0 Adju~
## 3 stride_time_me~ -5.51e+0 20.1 -0.274 7.85e-1 -46.2 35.1 Adju~
## 4 mean_cadence_s~ -1.85e-1 0.192 -0.961 3.43e-1 -0.574 0.204 Adju~
## 5 stride_width_m~ -2.18e+0 1.22 -1.79 8.07e-2 -4.64 0.280 Adju~
## 6 demoEHR_Age 2.78e-1 0.282 0.987 3.30e-1 -0.292 0.848 Adju~
## 7 demoEHR_Diseas~ -5.67e-3 0.428 -0.0133 9.89e-1 -0.872 0.861 Adju~
## 8 ms_dx_condense~ -4.25e+1 12.5 -3.41 1.57e-3 -67.8 -17.2 Adju~
## 9 ms_dx_condense~ 6.77e-1 11.6 0.0586 9.54e-1 -22.7 24.1 Adju~
## 10 race_ethnicity~ 3.52e+1 13.7 2.58 1.39e-2 7.59 62.9 Adju~
## 11 race_ethnicity~ -2.39e+1 14.6 -1.64 1.09e-1 -53.4 5.60 Adju~
## 12 race_ethnicity~ 1.11e+1 8.32 1.33 1.91e-1 -5.77 27.9 Adju~
## 13 clean_sexMale 2.56e+1 8.87 2.89 6.38e-3 7.65 43.6 Adju~
## # i 1 more variable: AdjRsquared <dbl>
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)") # remove intercept term
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.47
## 2 Adjusted 0.64
write.csv(adj_rsqu_df, file.path(output_dir, "PWSvel_home_allMetrics_unadjusted_vs_adjusted_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'Home: PWS_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p
ggsave(file.path(output_dir, "PWSvel_home_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)
# FW ------------------------------------------
uni1 <- lm(FW_velocitycmsecmean ~ demoEHR_Age, data = zeno_fw_df)
hist(resid(uni1))
uni2 <- lm(FW_velocitycmsecmean ~ demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(uni2))
uni3 <- lm(FW_velocitycmsecmean ~ ms_dx_condensed, data = zeno_fw_df)
hist(resid(uni3))
uni4 <- lm(FW_velocitycmsecmean ~ race_ethnicity_clean, data = zeno_fw_df)
hist(resid(uni4))
uni5 <- lm(FW_velocitycmsecmean ~ clean_sex, data = zeno_fw_df)
hist(resid(uni4))
## Extract regression results from each model
results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "demoEHR_Age",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "demoEHR_DiseaseDuration",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "ms_dx_condensed",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "race_ethnicity_clean",
AdjRsquared = summary(uni4)$adj.r.squared),
tidy(uni5, conf.int = TRUE) %>% mutate(Variable = "clean_sex",
AdjRsquared = summary(uni5)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- results[c("Variable","AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 5 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 demoEHR_Age 0.13
## 2 demoEHR_DiseaseDuration 0.01
## 3 ms_dx_condensed 0.23
## 4 race_ethnicity_clean 0.05
## 5 clean_sex 0
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_demographics_univariate_regression_adjR.csv"))
# Plot
title <- "FW: FW_velocitycmsecmean ~ Demographic/MS Info"
p <- univariate_regression_plot(results, title, 35)
p
ggsave(file.path(output_dir, "FWvel_fw_demographics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
### Univariate video metrics
uni1 <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = log_delta_pix_h_rel_median_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 4 rows containing missing values (`geom_point()`).
hist(resid(uni1))
uni2 <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_time_median_sec_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 53 rows containing missing values (`geom_point()`).
hist(resid(uni2))
uni3 <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = mean_cadence_step_per_min_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 46 rows containing missing values (`geom_point()`).
hist(resid(uni3))
uni4 <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv, data = zeno_fw_df)
ggplot(data = zeno_fw_df, aes(x = stride_width_median_cm_pose_zv, y = FW_velocitycmsecmean)) + geom_point()
## Warning: Removed 47 rows containing missing values (`geom_point()`).
hist(resid(uni4))
uni_results <- bind_rows(
tidy(uni1, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv",
AdjRsquared = summary(uni1)$adj.r.squared),
tidy(uni2, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv",
AdjRsquared = summary(uni2)$adj.r.squared),
tidy(uni3, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv",
AdjRsquared = summary(uni3)$adj.r.squared),
tidy(uni4, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv",
AdjRsquared = summary(uni4)$adj.r.squared)) %>%
filter(term != "(Intercept)") # Remove intercept term
uni_results <- uni_results %>% mutate(Model = "Unadjusted")
adj_rsqu_df <- uni_results[c("Variable", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 4 x 2
## Variable AdjRsquared
## <chr> <dbl>
## 1 log_delta_pix_h_rel_median_pose_zv 0.46
## 2 stride_time_median_sec_pose_zv 0.41
## 3 mean_cadence_step_per_min_pose_zv 0.34
## 4 stride_width_median_cm_pose_zv 0.09
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_metrics_univariate_regression_adjR.csv"))
p <- univariate_regression_plot(uni_results, 'FW: FW_velocitycmsecmean ~ Metric', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metrics_univariate_regression_estimates.png"),
bg = "white", width= 10, height=10)
## age ---------------------------------------------------
con1_age <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con1_age))
con2_age <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con2_age))
con3_age <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con3_age))
con4_age <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_Age, data = zeno_fw_df)
hist(resid(con4_age))
con_age_results <- bind_rows(
tidy(con1_age, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_age, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_age, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_age, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_Age") # Define confounders to remove
con_age_results <- con_age_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_age_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 51.1 3.95 12.9 1.26e-28 43.3 58.9 log_del~ Adju~
## 2 strid~ -161. 14.3 -11.3 2.70e-22 -189. -133. stride_~ Adju~
## 3 mean_~ 1.15 0.117 9.81 2.46e-18 0.916 1.38 mean_ca~ Adju~
## 4 strid~ -4.58 1.08 -4.25 3.55e- 5 -6.72 -2.45 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_age <- bind_rows(uni_results, con_age_results)
results_age
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.5 4.08 13.6 6.90e-31 47.5 63.5 log_del~
## 2 stride_time~ -165. 15.3 -10.8 5.69e-21 -195. -135. stride_~
## 3 mean_cadenc~ 1.19 0.125 9.54 1.29e-17 0.944 1.44 mean_ca~
## 4 stride_widt~ -4.81 1.14 -4.21 4.11e- 5 -7.07 -2.56 stride_~
## 5 log_delta_p~ 51.1 3.95 12.9 1.26e-28 43.3 58.9 log_del~
## 6 stride_time~ -161. 14.3 -11.3 2.70e-22 -189. -133. stride_~
## 7 mean_cadenc~ 1.15 0.117 9.81 2.46e-18 0.916 1.38 mean_ca~
## 8 stride_widt~ -4.58 1.08 -4.25 3.55e- 5 -6.72 -2.45 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_age, 'FW: FW_velocitycmsecmean ~ Metric + demoEHR_Age', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_age.png"),
bg = "white", width= 10, height=10)
## disease duration -----------------------------------------------------
con1_dur <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con1_dur))
con2_dur <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con2_dur))
con3_dur <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con3_dur))
con4_dur <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + demoEHR_DiseaseDuration, data = zeno_fw_df)
hist(resid(con4_dur))
con_dur_results <- bind_rows(
tidy(con1_dur, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dur, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dur, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dur, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("demoEHR_DiseaseDuration") # Define confounders to remove
con_dur_results
## # A tibble: 8 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.0 4.08 13.5 2.13e-30 47.0 63.0 log_del~
## 2 demoEHR_Dis~ -0.384 0.278 -1.38 1.68e- 1 -0.932 0.164 log_del~
## 3 stride_time~ -167. 14.9 -11.2 3.84e-22 -196. -138. stride_~
## 4 demoEHR_Dis~ -0.972 0.306 -3.17 1.79e- 3 -1.58 -0.367 stride_~
## 5 mean_cadenc~ 1.21 0.123 9.78 2.99e-18 0.963 1.45 mean_ca~
## 6 demoEHR_Dis~ -0.802 0.339 -2.36 1.92e- 2 -1.47 -0.133 mean_ca~
## 7 stride_widt~ -4.66 1.15 -4.05 7.69e- 5 -6.93 -2.39 stride_~
## 8 demoEHR_Dis~ -0.471 0.405 -1.17 2.45e- 1 -1.27 0.327 stride_~
con_dur_results <- con_dur_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dur_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 55.0 4.08 13.5 2.13e-30 47.0 63.0 log_del~ Adju~
## 2 strid~ -167. 14.9 -11.2 3.84e-22 -196. -138. stride_~ Adju~
## 3 mean_~ 1.21 0.123 9.78 2.99e-18 0.963 1.45 mean_ca~ Adju~
## 4 strid~ -4.66 1.15 -4.05 7.69e- 5 -6.93 -2.39 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dur <- bind_rows(uni_results, con_dur_results)
results_dur
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.5 4.08 13.6 6.90e-31 47.5 63.5 log_del~
## 2 stride_time~ -165. 15.3 -10.8 5.69e-21 -195. -135. stride_~
## 3 mean_cadenc~ 1.19 0.125 9.54 1.29e-17 0.944 1.44 mean_ca~
## 4 stride_widt~ -4.81 1.14 -4.21 4.11e- 5 -7.07 -2.56 stride_~
## 5 log_delta_p~ 55.0 4.08 13.5 2.13e-30 47.0 63.0 log_del~
## 6 stride_time~ -167. 14.9 -11.2 3.84e-22 -196. -138. stride_~
## 7 mean_cadenc~ 1.21 0.123 9.78 2.99e-18 0.963 1.45 mean_ca~
## 8 stride_widt~ -4.66 1.15 -4.05 7.69e- 5 -6.93 -2.39 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dur, 'FW: FW_velocitycmsecmean ~ Metric + demoEHR_DiseaseDuration', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_duration.png"),
bg = "white", width= 10, height=10)
## MS DX ---------------------------------------------------------------
con1_dx <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con1_dx))
con2_dx <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con2_dx))
con3_dx <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con3_dx))
con4_dx <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + ms_dx_condensed, data = zeno_fw_df)
hist(resid(con4_dx))
con_dx_results <- bind_rows(
tidy(con1_dx, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_dx, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_dx, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_dx, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("ms_dx_condensed") # Define confounders to remove
con_dx_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 47.0 4.10 11.5 5.07e-24 39.0 55.1 log_del~
## 2 ms_dx_cond~ -33.0 5.85 -5.64 5.22e- 8 -44.5 -21.5 log_del~
## 3 ms_dx_cond~ -25.8 30.8 -0.835 4.04e- 1 -86.6 35.0 log_del~
## 4 stride_tim~ -146. 16.2 -9.01 5.04e-16 -178. -114. stride_~
## 5 ms_dx_cond~ -23.3 7.62 -3.06 2.58e- 3 -38.4 -8.28 stride_~
## 6 ms_dx_cond~ -19.1 23.0 -0.830 4.08e- 1 -64.6 26.4 stride_~
## 7 mean_caden~ 0.994 0.125 7.96 2.17e-13 0.748 1.24 mean_ca~
## 8 ms_dx_cond~ -36.4 7.53 -4.83 3.01e- 6 -51.2 -21.5 mean_ca~
## 9 ms_dx_cond~ -23.7 24.5 -0.968 3.35e- 1 -72.1 24.7 mean_ca~
## 10 stride_wid~ -3.81 1.05 -3.63 3.68e- 4 -5.87 -1.74 stride_~
## 11 ms_dx_cond~ -52.5 8.03 -6.53 7.12e-10 -68.3 -36.6 stride_~
## 12 ms_dx_cond~ -5.90 27.6 -0.214 8.31e- 1 -60.3 48.5 stride_~
con_dx_results <- con_dx_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_dx_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 47.0 4.10 11.5 5.07e-24 39.0 55.1 log_del~ Adju~
## 2 strid~ -146. 16.2 -9.01 5.04e-16 -178. -114. stride_~ Adju~
## 3 mean_~ 0.994 0.125 7.96 2.17e-13 0.748 1.24 mean_ca~ Adju~
## 4 strid~ -3.81 1.05 -3.63 3.68e- 4 -5.87 -1.74 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_dx <- bind_rows(uni_results, con_dx_results)
results_dx
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.5 4.08 13.6 6.90e-31 47.5 63.5 log_del~
## 2 stride_time~ -165. 15.3 -10.8 5.69e-21 -195. -135. stride_~
## 3 mean_cadenc~ 1.19 0.125 9.54 1.29e-17 0.944 1.44 mean_ca~
## 4 stride_widt~ -4.81 1.14 -4.21 4.11e- 5 -7.07 -2.56 stride_~
## 5 log_delta_p~ 47.0 4.10 11.5 5.07e-24 39.0 55.1 log_del~
## 6 stride_time~ -146. 16.2 -9.01 5.04e-16 -178. -114. stride_~
## 7 mean_cadenc~ 0.994 0.125 7.96 2.17e-13 0.748 1.24 mean_ca~
## 8 stride_widt~ -3.81 1.05 -3.63 3.68e- 4 -5.87 -1.74 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_dx, 'FW: FW_velocitycmsecmean ~ Metric + MS DX', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_MSDX.png"),
bg = "white", width= 10, height=10)
## race_ethnicity -----------------------------------------------------
con1_re <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con1_re))
con2_re <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con2_re))
con3_re <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con3_re))
con4_re <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + race_ethnicity_clean, data = zeno_fw_df)
hist(resid(con4_re))
con_re_results <- bind_rows(
tidy(con1_re, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_re, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_re, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_re, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("race_ethnicity_clean") # Define confounders to remove
con_re_results
## # A tibble: 20 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 53.9 4.04 13.3 7.40e-30 46.0 61.9 log_del~
## 2 race_ethni~ -3.48 8.47 -0.410 6.82e- 1 -20.2 13.2 log_del~
## 3 race_ethni~ -26.4 9.08 -2.91 3.98e- 3 -44.3 -8.53 log_del~
## 4 race_ethni~ 6.90 7.86 0.877 3.81e- 1 -8.60 22.4 log_del~
## 5 race_ethni~ 11.3 8.04 1.41 1.60e- 1 -4.51 27.2 log_del~
## 6 stride_tim~ -157. 15.1 -10.4 1.16e-19 -186. -127. stride_~
## 7 race_ethni~ -2.53 9.17 -0.276 7.83e- 1 -20.6 15.6 stride_~
## 8 race_ethni~ -34.5 9.90 -3.48 6.34e- 4 -54.0 -14.9 stride_~
## 9 race_ethni~ -4.32 8.61 -0.502 6.16e- 1 -21.3 12.7 stride_~
## 10 race_ethni~ 5.25 9.79 0.537 5.92e- 1 -14.1 24.6 stride_~
## 11 mean_caden~ 1.16 0.122 9.47 2.31e-17 0.916 1.40 mean_ca~
## 12 race_ethni~ -1.17 10.0 -0.116 9.08e- 1 -21.0 18.7 mean_ca~
## 13 race_ethni~ -39.2 10.8 -3.64 3.58e- 4 -60.5 -18.0 mean_ca~
## 14 race_ethni~ -4.50 9.20 -0.489 6.25e- 1 -22.7 13.7 mean_ca~
## 15 race_ethni~ 6.91 10.8 0.642 5.21e- 1 -14.3 28.1 mean_ca~
## 16 stride_wid~ -4.11 1.18 -3.49 6.21e- 4 -6.44 -1.78 stride_~
## 17 race_ethni~ -0.977 12.1 -0.0809 9.36e- 1 -24.8 22.9 stride_~
## 18 race_ethni~ -38.3 13.0 -2.94 3.70e- 3 -63.9 -12.6 stride_~
## 19 race_ethni~ -2.16 10.9 -0.198 8.44e- 1 -23.7 19.4 stride_~
## 20 race_ethni~ 4.50 12.9 0.349 7.27e- 1 -20.9 29.9 stride_~
con_re_results <- con_re_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_re_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 53.9 4.04 13.3 7.40e-30 46.0 61.9 log_del~ Adju~
## 2 strid~ -157. 15.1 -10.4 1.16e-19 -186. -127. stride_~ Adju~
## 3 mean_~ 1.16 0.122 9.47 2.31e-17 0.916 1.40 mean_ca~ Adju~
## 4 strid~ -4.11 1.18 -3.49 6.21e- 4 -6.44 -1.78 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_re <- bind_rows(uni_results, con_re_results)
results_re
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.5 4.08 13.6 6.90e-31 47.5 63.5 log_del~
## 2 stride_time~ -165. 15.3 -10.8 5.69e-21 -195. -135. stride_~
## 3 mean_cadenc~ 1.19 0.125 9.54 1.29e-17 0.944 1.44 mean_ca~
## 4 stride_widt~ -4.81 1.14 -4.21 4.11e- 5 -7.07 -2.56 stride_~
## 5 log_delta_p~ 53.9 4.04 13.3 7.40e-30 46.0 61.9 log_del~
## 6 stride_time~ -157. 15.1 -10.4 1.16e-19 -186. -127. stride_~
## 7 mean_cadenc~ 1.16 0.122 9.47 2.31e-17 0.916 1.40 mean_ca~
## 8 stride_widt~ -4.11 1.18 -3.49 6.21e- 4 -6.44 -1.78 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_re, 'FW: FW_velocitycmsecmean~ Metric + race_ethnicity_clean', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_RaceEthnicity.png"),
bg = "white", width= 10, height=10)
## sex -------------------------------------------------------------------
con1_sex <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con1_sex))
con2_sex <- lm(FW_velocitycmsecmean ~ stride_time_median_sec_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con2_sex))
con3_sex <- lm(FW_velocitycmsecmean ~ mean_cadence_step_per_min_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con3_sex))
con4_sex <- lm(FW_velocitycmsecmean ~ stride_width_median_cm_pose_zv + clean_sex, data = zeno_fw_df)
hist(resid(con4_sex))
con_sex_results <- bind_rows(
tidy(con1_sex, conf.int = TRUE) %>% mutate(Variable = "log_delta_pix_h_rel_median_pose_zv"),
tidy(con2_sex, conf.int = TRUE) %>% mutate(Variable = "stride_time_median_sec_pose_zv"),
tidy(con3_sex, conf.int = TRUE) %>% mutate(Variable = "mean_cadence_step_per_min_pose_zv"),
tidy(con4_sex, conf.int = TRUE) %>% mutate(Variable = "stride_width_median_cm_pose_zv")) %>%
filter(term != "(Intercept)") # Remove intercept term
confounders <- c("clean_sex") # Define confounders to remove
con_sex_results
## # A tibble: 12 x 8
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_~ 55.1 4.09 13.5 2.31e-30 47.1 63.2 log_del~
## 2 clean_sexM~ 5.32 5.32 1.00 3.18e- 1 -5.16 15.8 log_del~
## 3 clean_sexN~ 14.7 23.4 0.629 5.30e- 1 -31.4 60.9 log_del~
## 4 stride_tim~ -165. 15.2 -10.9 4.08e-21 -195. -135. stride_~
## 5 clean_sexM~ 8.88 5.84 1.52 1.30e- 1 -2.65 20.4 stride_~
## 6 clean_sexN~ 26.1 23.5 1.11 2.69e- 1 -20.3 72.5 stride_~
## 7 mean_caden~ 1.19 0.124 9.57 1.13e-17 0.945 1.44 mean_ca~
## 8 clean_sexM~ 11.8 6.39 1.85 6.58e- 2 -0.782 24.4 mean_ca~
## 9 clean_sexN~ 17.0 25.9 0.656 5.13e- 1 -34.2 68.2 mean_ca~
## 10 stride_wid~ -5.13 1.14 -4.48 1.34e- 5 -7.39 -2.87 stride_~
## 11 clean_sexM~ 14.5 7.52 1.93 5.57e- 2 -0.355 29.4 stride_~
## 12 clean_sexN~ 35.1 30.2 1.16 2.47e- 1 -24.6 94.8 stride_~
con_sex_results <- con_sex_results %>%
filter(!grepl(confounders, term)) %>% # Remove confounders
mutate(Model = "Adjusted")
con_sex_results
## # A tibble: 4 x 9
## term estimate std.error statistic p.value conf.low conf.high Variable Model
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 log_d~ 55.1 4.09 13.5 2.31e-30 47.1 63.2 log_del~ Adju~
## 2 strid~ -165. 15.2 -10.9 4.08e-21 -195. -135. stride_~ Adju~
## 3 mean_~ 1.19 0.124 9.57 1.13e-17 0.945 1.44 mean_ca~ Adju~
## 4 strid~ -5.13 1.14 -4.48 1.34e- 5 -7.39 -2.87 stride_~ Adju~
# plot adjusted for age vs not adjusted univariate
# bind all together to single results df
results_sex <- bind_rows(uni_results, con_sex_results)
results_sex
## # A tibble: 8 x 10
## term estimate std.error statistic p.value conf.low conf.high Variable
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 log_delta_p~ 55.5 4.08 13.6 6.90e-31 47.5 63.5 log_del~
## 2 stride_time~ -165. 15.3 -10.8 5.69e-21 -195. -135. stride_~
## 3 mean_cadenc~ 1.19 0.125 9.54 1.29e-17 0.944 1.44 mean_ca~
## 4 stride_widt~ -4.81 1.14 -4.21 4.11e- 5 -7.07 -2.56 stride_~
## 5 log_delta_p~ 55.1 4.09 13.5 2.31e-30 47.1 63.2 log_del~
## 6 stride_time~ -165. 15.2 -10.9 4.08e-21 -195. -135. stride_~
## 7 mean_cadenc~ 1.19 0.124 9.57 1.13e-17 0.945 1.44 mean_ca~
## 8 stride_widt~ -5.13 1.14 -4.48 1.34e- 5 -7.39 -2.87 stride_~
## # i 2 more variables: AdjRsquared <dbl>, Model <chr>
p <- adj_vs_unadj_regression_plot(results_sex, 'FW: FW_velocitycmsecmean ~ Metric + Sex', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_metric_regression_adj_sex.png"),
bg = "white", width= 10, height=10)
Metrics only - not including double support/stance measures, too many missing. May include after improving code
# FW
# Unadjusted
all_unadj <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv,
data = zeno_fw_df)
summary(all_unadj)
##
## Call:
## lm(formula = FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv, data = zeno_fw_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.926 -16.952 -0.967 17.011 75.803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 252.9380 36.9169 6.852 1.45e-10 ***
## log_delta_pix_h_rel_median_pose_zv 32.7833 4.9370 6.640 4.50e-10 ***
## stride_time_median_sec_pose_zv -80.2188 20.0400 -4.003 9.50e-05 ***
## mean_cadence_step_per_min_pose_zv 0.2778 0.1695 1.639 0.1031
## stride_width_median_cm_pose_zv -2.1265 0.8317 -2.557 0.0115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.97 on 162 degrees of freedom
## (55 observations deleted due to missingness)
## Multiple R-squared: 0.5885, Adjusted R-squared: 0.5784
## F-statistic: 57.93 on 4 and 162 DF, p-value: < 2.2e-16
hist(resid(all_unadj))
# Adjusted
all_adjusted <- lm(FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
stride_time_median_sec_pose_zv +
mean_cadence_step_per_min_pose_zv +
stride_width_median_cm_pose_zv +
demoEHR_Age +
demoEHR_DiseaseDuration +
ms_dx_condensed +
race_ethnicity_clean +
clean_sex,
data = zeno_fw_df)
summary(all_adjusted)
##
## Call:
## lm(formula = FW_velocitycmsecmean ~ log_delta_pix_h_rel_median_pose_zv +
## stride_time_median_sec_pose_zv + mean_cadence_step_per_min_pose_zv +
## stride_width_median_cm_pose_zv + demoEHR_Age + demoEHR_DiseaseDuration +
## ms_dx_condensed + race_ethnicity_clean + clean_sex, data = zeno_fw_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.084 -13.704 0.475 14.709 62.648
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 279.05497 34.97490 7.979
## log_delta_pix_h_rel_median_pose_zv 26.43804 4.59993 5.747
## stride_time_median_sec_pose_zv -70.43068 18.30922 -3.847
## mean_cadence_step_per_min_pose_zv 0.30823 0.15484 1.991
## stride_width_median_cm_pose_zv -1.77231 0.78927 -2.246
## demoEHR_Age -0.94283 0.20187 -4.671
## demoEHR_DiseaseDuration 0.09378 0.28616 0.328
## ms_dx_condensedProgressive MS -9.57995 6.43577 -1.489
## ms_dx_condensedMS, Subtype Not Specified -21.09913 25.73336 -0.820
## race_ethnicity_cleanAsian -10.01438 7.78829 -1.286
## race_ethnicity_cleanBlack Or African American -31.79119 7.89517 -4.027
## race_ethnicity_cleanHispanic or Latino -10.52545 7.04757 -1.493
## race_ethnicity_cleanOther/Unknown/Declined -6.10940 8.14599 -0.750
## clean_sexMale 8.29111 4.72665 1.754
## clean_sexNon-Binary 9.88749 18.06623 0.547
## Pr(>|t|)
## (Intercept) 3.33e-13 ***
## log_delta_pix_h_rel_median_pose_zv 4.80e-08 ***
## stride_time_median_sec_pose_zv 0.000176 ***
## mean_cadence_step_per_min_pose_zv 0.048323 *
## stride_width_median_cm_pose_zv 0.026178 *
## demoEHR_Age 6.56e-06 ***
## demoEHR_DiseaseDuration 0.743569
## ms_dx_condensedProgressive MS 0.138678
## ms_dx_condensedMS, Subtype Not Specified 0.413549
## race_ethnicity_cleanAsian 0.200459
## race_ethnicity_cleanBlack Or African American 8.90e-05 ***
## race_ethnicity_cleanHispanic or Latino 0.137383
## race_ethnicity_cleanOther/Unknown/Declined 0.454421
## clean_sexMale 0.081426 .
## clean_sexNon-Binary 0.584981
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.81 on 152 degrees of freedom
## (55 observations deleted due to missingness)
## Multiple R-squared: 0.6963, Adjusted R-squared: 0.6684
## F-statistic: 24.9 on 14 and 152 DF, p-value: < 2.2e-16
hist(resid(all_adjusted))
# format for plotting
# Unadjusted
unadj_all_results <- tidy(all_unadj, conf.int = TRUE) %>%
mutate(Model = "Unadjusted",
AdjRsquared = summary(all_unadj)$adj.r.squared)
# Adjusted
adj_all_results <- tidy(all_adjusted, conf.int = TRUE) %>%
mutate(Model = "Adjusted",
AdjRsquared = summary(all_adjusted)$adj.r.squared)
# combine and plot
multivar_results <- bind_rows(unadj_all_results, adj_all_results) %>%
filter(term != "(Intercept)") # Remove intercept term
adj_rsqu_df <- multivar_results[c("Model", "AdjRsquared")] %>%
distinct() # remove duplicates
adj_rsqu_df$AdjRsquared <- round(adj_rsqu_df$AdjRsquared, 2)
adj_rsqu_df
## # A tibble: 2 x 2
## Model AdjRsquared
## <chr> <dbl>
## 1 Unadjusted 0.58
## 2 Adjusted 0.67
write.csv(adj_rsqu_df, file.path(output_dir, "FWvel_fw_metrics_univariate_regression_adjR.csv"))
p <- adj_vs_unadj_regression_plot(multivar_results, 'FW: FW_velocitycmsecmean Multivariate - Unadjusted vs Adjusted', 35)
p
ggsave(file.path(output_dir, "FWvel_fw_allMetrics_unadjusted_vs_adjusted.png"),
bg = "white", width= 10, height=10)